1 /**
2 *
3 * Copyright 2003-2006 The Apache Software Foundation
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package javax.mail.internet;
19
20 import java.io.UnsupportedEncodingException;
21 import java.lang.reflect.Array;
22 import java.util.ArrayList;
23 import java.util.List;
24
25 class AddressParser {
26
27
28 static public final int NONSTRICT = 0;
29 static public final int PARSE_HEADER = 1;
30 static public final int STRICT = 2;
31
32
33 static protected final int UNKNOWN = 0;
34 static protected final int ROUTE_ADDR = 1;
35 static protected final int GROUP_ADDR = 2;
36 static protected final int SIMPLE_ADDR = 3;
37
38
39 static protected final int END_OF_TOKENS = '\0';
40 static protected final int PERIOD = '.';
41 static protected final int LEFT_ANGLE = '<';
42 static protected final int RIGHT_ANGLE = '>';
43 static protected final int COMMA = ',';
44 static protected final int AT_SIGN = '@';
45 static protected final int SEMICOLON = ';';
46 static protected final int COLON = ':';
47 static protected final int QUOTED_LITERAL = '"';
48 static protected final int DOMAIN_LITERAL = '[';
49 static protected final int COMMENT = '(';
50 static protected final int ATOM = 'A';
51 static protected final int WHITESPACE = ' ';
52
53
54
55 private String addresses;
56
57 private int position;
58
59 private int end;
60
61 private int validationLevel;
62
63 public AddressParser(String addresses, int validation) {
64 this.addresses = addresses;
65 validationLevel = validation;
66 }
67
68
69 /**
70 * Parse an address list into an array of internet addresses.
71 *
72 * @return An array containing all of the non-null addresses in the list.
73 * @exception AddressException
74 * Thrown for any validation errors.
75 */
76 public InternetAddress[] parseAddressList() throws AddressException
77 {
78
79 TokenStream tokens = tokenizeAddress();
80
81
82 ArrayList addressList = new ArrayList();
83
84
85 while (true) {
86
87
88
89 addressList.addAll(parseSingleAddress(tokens, false));
90
91
92 AddressToken token = tokens.nextToken();
93 if (token.type == END_OF_TOKENS) {
94 break;
95 }
96 }
97
98 return (InternetAddress [])addressList.toArray(new InternetAddress[0]);
99 }
100
101
102 /**
103 * Parse a single internet address. This must be a single address,
104 * not an address list.
105 *
106 * @exception AddressException
107 */
108 public InternetAddress parseAddress() throws AddressException
109 {
110
111 TokenStream tokens = tokenizeAddress();
112
113
114
115
116 List addressList = parseSingleAddress(tokens, false);
117
118 if (addressList.isEmpty()) {
119 throw new AddressException("Null address", addresses, 0);
120 }
121
122 if (addressList.size() > 1) {
123 throw new AddressException("Illegal Address", addresses, 0);
124 }
125
126
127 AddressToken token = tokens.nextToken();
128 if (token.type != END_OF_TOKENS) {
129 illegalAddress("Illegal Address", token);
130 }
131
132 return (InternetAddress)addressList.get(0);
133 }
134
135
136 /**
137 * Validate an internet address. This must be a single address,
138 * not a list of addresses. The address also must not contain
139 * and personal information to be valid.
140 *
141 * @exception AddressException
142 */
143 public void validateAddress() throws AddressException
144 {
145
146 TokenStream tokens = tokenizeAddress();
147
148
149
150
151 List addressList = parseSingleAddress(tokens, false);
152 if (addressList.isEmpty()) {
153 throw new AddressException("Null address", addresses, 0);
154 }
155
156
157 if (addressList.size() > 1) {
158 throw new AddressException("Illegal Address", addresses, 0);
159 }
160
161 InternetAddress address = (InternetAddress)addressList.get(0);
162
163
164
165 if (address.personal != null) {
166 throw new AddressException("Illegal Address", addresses, 0);
167 }
168
169 AddressToken token = tokens.nextToken();
170 if (token.type != END_OF_TOKENS) {
171 illegalAddress("Illegal Address", token);
172 }
173 }
174
175
176 /**
177 * Extract the set of address from a group Internet specification.
178 *
179 * @return An array containing all of the non-null addresses in the list.
180 * @exception AddressException
181 */
182 public InternetAddress[] extractGroupList() throws AddressException
183 {
184
185 TokenStream tokens = tokenizeAddress();
186
187
188 ArrayList addresses = new ArrayList();
189
190 AddressToken token = tokens.nextToken();
191
192
193
194 while (token.type != COLON) {
195 if (token.type == END_OF_TOKENS) {
196 illegalAddress("Missing ':'", token);
197 }
198 token = tokens.nextToken();
199 }
200
201
202 while (true) {
203
204
205
206 addresses.addAll(parseSingleAddress(tokens, true));
207
208
209 token = tokens.nextToken();
210 if (token.type == SEMICOLON) {
211 break;
212 }
213 else if (token.type == END_OF_TOKENS) {
214 illegalAddress("Missing ';'", token);
215 }
216 }
217
218 return (InternetAddress [])addresses.toArray(new InternetAddress[0]);
219 }
220
221
222 /**
223 * Parse out a single address from a string from a string
224 * of address tokens, returning an InternetAddress object that
225 * represents the address.
226 *
227 * @param tokens The token source for this address.
228 *
229 * @return A parsed out and constructed InternetAddress object for
230 * the next address. Returns null if this is an "empty"
231 * address in a list.
232 * @exception AddressException
233 */
234 private List parseSingleAddress(TokenStream tokens, boolean inGroup) throws AddressException
235 {
236 List parsedAddresses = new ArrayList();
237
238
239 AddressToken personalStart = null;
240 AddressToken personalEnd = null;
241
242
243 AddressToken addressStart = null;
244 AddressToken addressEnd = null;
245
246
247
248
249 boolean nonStrictRules = true;
250
251
252 int addressType = UNKNOWN;
253
254
255
256
257
258
259
260
261
262 AddressToken first = tokens.nextToken();
263
264 tokens.pushToken(first);
265
266
267 while (addressType == UNKNOWN) {
268
269 AddressToken token = tokens.nextToken();
270 switch (token.type) {
271
272
273
274 case COMMENT:
275
276 nonStrictRules = false;
277 break;
278
279
280
281 case SEMICOLON:
282 if (inGroup) {
283
284 tokens.pushToken(token);
285
286
287 if (addressStart == null) {
288
289 return parsedAddresses;
290 }
291
292 addressEnd = tokens.previousToken(token);
293
294
295 personalStart = null;
296
297 addressType = SIMPLE_ADDR;
298 break;
299 }
300
301
302
303
304
305
306 case DOMAIN_LITERAL:
307 case QUOTED_LITERAL:
308
309 nonStrictRules = false;
310
311 case ATOM:
312 case AT_SIGN:
313 case PERIOD:
314
315
316 if (addressStart == null) {
317 if (personalStart == null) {
318 personalStart = token;
319 }
320
321
322
323
324 addressStart = token;
325 }
326 break;
327
328
329
330 case LEFT_ANGLE:
331
332 nonStrictRules = false;
333
334 addressType = ROUTE_ADDR;
335
336
337 addressStart = tokens.nextRealToken();
338
339 tokens.pushToken(addressStart);
340
341 if (personalStart != null) {
342 personalEnd = tokens.previousToken(token);
343 }
344
345 addressEnd = scanRouteAddress(tokens, false);
346 break;
347
348
349 case COLON:
350
351 nonStrictRules = false;
352
353
354 if (inGroup) {
355 illegalAddress("Nested group element", token);
356 }
357 addressType = GROUP_ADDR;
358
359 personalStart = null;
360
361 addressStart = first;
362 addressEnd = scanGroupAddress(tokens);
363 break;
364
365
366
367
368
369
370 case END_OF_TOKENS:
371
372
373 if (inGroup) {
374 illegalAddress("Missing ';'", token);
375 }
376
377
378
379
380 case COMMA:
381
382 tokens.pushToken(token);
383
384
385 if (addressStart == null) {
386
387 return parsedAddresses;
388 }
389
390 addressEnd = tokens.previousToken(token);
391
392
393 personalStart = null;
394
395 addressType = SIMPLE_ADDR;
396 break;
397
398
399
400 case RIGHT_ANGLE:
401 illegalAddress("Unexpected '>'", token);
402
403 }
404 }
405
406 String personal = null;
407
408
409 if (personalStart != null) {
410 TokenStream personalTokens = tokens.section(personalStart, personalEnd);
411 personal = personalToString(personalTokens);
412 }
413
414
415 else {
416 if (addressType == SIMPLE_ADDR && first.type == COMMENT) {
417 personal = first.value;
418 }
419 }
420
421 TokenStream addressTokens = tokens.section(addressStart, addressEnd);
422
423
424
425
426 if (validationLevel != PARSE_HEADER) {
427 switch (addressType) {
428 case GROUP_ADDR:
429 validateGroup(addressTokens);
430 break;
431
432 case ROUTE_ADDR:
433 validateRouteAddr(addressTokens, false);
434 break;
435
436 case SIMPLE_ADDR:
437
438 validateSimpleAddress(addressTokens);
439 break;
440 }
441 }
442
443
444
445 if (validationLevel != NONSTRICT || addressType != SIMPLE_ADDR || !nonStrictRules) {
446
447
448 addressTokens.reset();
449 String address = addressToString(addressTokens);
450
451
452 InternetAddress result = new InternetAddress();
453 result.setAddress(address);
454 try {
455 result.setPersonal(personal);
456 } catch (UnsupportedEncodingException e) {
457 }
458
459
460 parsedAddresses.add(result);
461 return parsedAddresses;
462 }
463 else {
464 addressTokens.reset();
465
466 TokenStream nextAddress = addressTokens.getBlankDelimitedToken();
467 while (nextAddress != null) {
468 String address = addressToString(nextAddress);
469
470 InternetAddress result = new InternetAddress();
471 result.setAddress(address);
472 parsedAddresses.add(result);
473 nextAddress = addressTokens.getBlankDelimitedToken();
474 }
475 return parsedAddresses;
476 }
477 }
478
479
480 /**
481 * Scan the token stream, parsing off a route addr spec. This
482 * will do some basic syntax validation, but will not actually
483 * validate any of the address information. Comments will be
484 * discarded.
485 *
486 * @param tokens The stream of tokens.
487 *
488 * @return The last token of the route address (the one preceeding the
489 * terminating '>'.
490 */
491 private AddressToken scanRouteAddress(TokenStream tokens, boolean inGroup) throws AddressException {
492
493 AddressToken token = tokens.nextRealToken();
494
495
496
497 AddressToken previous = null;
498
499
500
501 boolean inRoute = token.type == AT_SIGN;
502
503
504 while (true) {
505 switch (token.type) {
506
507 case ATOM:
508 case QUOTED_LITERAL:
509 case DOMAIN_LITERAL:
510 case PERIOD:
511 case AT_SIGN:
512 break;
513
514 case COLON:
515
516 if (!inRoute) {
517 illegalAddress("Unexpected ':'", token);
518 }
519
520 inRoute = false;
521 break;
522
523 case COMMA:
524
525 if (!inRoute) {
526 illegalAddress("Unexpected ','", token);
527 }
528 break;
529
530 case RIGHT_ANGLE:
531
532 if (previous == null) {
533 illegalAddress("Illegal address", token);
534 }
535
536
537 token = tokens.nextRealToken();
538
539 if (inGroup) {
540 if (token.type != COMMA && token.type != SEMICOLON) {
541 illegalAddress("Illegal address", token);
542 }
543 }
544
545 else {
546 if (token.type != COMMA && token.type != END_OF_TOKENS) {
547 illegalAddress("Illegal address", token);
548 }
549 }
550
551 tokens.pushToken(token);
552
553 return previous;
554
555 case END_OF_TOKENS:
556 illegalAddress("Missing '>'", token);
557
558
559 case SEMICOLON:
560 illegalAddress("Unexpected ';'", token);
561
562 case LEFT_ANGLE:
563 illegalAddress("Unexpected '<'", token);
564 }
565
566 previous = token;
567 token = tokens.nextRealToken();
568 }
569 }
570
571
572 /**
573 * Scan the token stream, parsing off a group address. This
574 * will do some basic syntax validation, but will not actually
575 * validate any of the address information. Comments will be
576 * ignored.
577 *
578 * @param tokens The stream of tokens.
579 *
580 * @return The last token of the group address (the terminating ':").
581 */
582 private AddressToken scanGroupAddress(TokenStream tokens) throws AddressException {
583
584
585 AddressToken token = tokens.nextRealToken();
586
587
588 while (true) {
589 switch (token.type) {
590
591 case ATOM:
592 case QUOTED_LITERAL:
593 case DOMAIN_LITERAL:
594 case PERIOD:
595 case AT_SIGN:
596 case COMMA:
597 break;
598
599 case COLON:
600 illegalAddress("Nested group", token);
601
602
603
604 case LEFT_ANGLE:
605 scanRouteAddress(tokens, true);
606 break;
607
608
609 case END_OF_TOKENS:
610 illegalAddress("Missing ';'", token);
611
612
613 case SEMICOLON:
614
615 AddressToken next = tokens.nextRealToken();
616 if (next.type != COMMA && next.type != END_OF_TOKENS) {
617 illegalAddress("Illegal address", token);
618 }
619
620 tokens.pushToken(next);
621 return token;
622
623 case RIGHT_ANGLE:
624 illegalAddress("Unexpected '>'", token);
625 }
626 token = tokens.nextRealToken();
627 }
628 }
629
630
631 /**
632 * Parse the provided internet address into a set of tokens. This
633 * phase only does a syntax check on the tokens. The interpretation
634 * of the tokens is the next phase.
635 *
636 * @exception AddressException
637 */
638 private TokenStream tokenizeAddress() throws AddressException {
639
640
641 TokenStream tokens = new TokenStream();
642
643 end = addresses.length();
644
645
646 while (moreCharacters()) {
647 char ch = currentChar();
648
649 switch (ch) {
650
651 case '(':
652 scanComment(tokens);
653 break;
654
655 case ')':
656 syntaxError("Unexpected ')'", position);
657
658
659
660 case '"':
661 scanQuotedLiteral(tokens);
662 break;
663
664 case '[':
665 scanDomainLiteral(tokens);
666 break;
667
668
669 case ']':
670 syntaxError("Unexpected ']'", position);
671
672
673 case '<':
674 tokens.addToken(new AddressToken(LEFT_ANGLE, position));
675 nextChar();
676 break;
677
678
679
680 case '>':
681 tokens.addToken(new AddressToken(RIGHT_ANGLE, position));
682 nextChar();
683 break;
684 case ':':
685 tokens.addToken(new AddressToken(COLON, position));
686 nextChar();
687 break;
688 case ',':
689 tokens.addToken(new AddressToken(COMMA, position));
690 nextChar();
691 break;
692 case '.':
693 tokens.addToken(new AddressToken(PERIOD, position));
694 nextChar();
695 break;
696 case ';':
697 tokens.addToken(new AddressToken(SEMICOLON, position));
698 nextChar();
699 break;
700 case '@':
701 tokens.addToken(new AddressToken(AT_SIGN, position));
702 nextChar();
703 break;
704
705
706
707
708 case ' ':
709 case '\t':
710 case '\r':
711 case '\n':
712
713 tokens.addToken(new AddressToken(WHITESPACE, position));
714
715 nextChar();
716
717
718 while (moreCharacters()) {
719 char nextChar = currentChar();
720 if (nextChar == ' ' || nextChar == '\t' || nextChar == '\r' || nextChar == '\n') {
721 nextChar();
722 }
723 else {
724 break;
725 }
726 }
727 break;
728
729
730
731 default:
732 if (ch < 040 || ch >= 0177) {
733 syntaxError("Illegal character in address", position);
734 }
735
736 scanAtom(tokens);
737 break;
738 }
739 }
740
741
742 tokens.addToken(new AddressToken(END_OF_TOKENS, addresses.length()));
743 return tokens;
744 }
745
746
747 /**
748 * Step to the next character position while parsing.
749 */
750 private void nextChar() {
751 position++;
752 }
753
754
755 /**
756 * Retrieve the character at the current parsing position.
757 *
758 * @return The current character.
759 */
760 private char currentChar() {
761 return addresses.charAt(position);
762 }
763
764 /**
765 * Test if there are more characters left to parse.
766 *
767 * @return True if we've hit the last character, false otherwise.
768 */
769 private boolean moreCharacters() {
770 return position < end;
771 }
772
773
774 /**
775 * Parse a quoted string as specified by the RFC822 specification.
776 *
777 * @param tokens The TokenStream where the parsed out token is added.
778 */
779 private void scanQuotedLiteral(TokenStream tokens) throws AddressException {
780 StringBuffer value = new StringBuffer();
781
782
783 int startPosition = position;
784
785 nextChar();
786
787 while (moreCharacters()) {
788 char ch = currentChar();
789
790
791 if (ch == '\\') {
792
793 nextChar();
794 if (!moreCharacters()) {
795 syntaxError("Missing '\"'", position);
796 }
797 value.append(currentChar());
798 }
799
800 else if (ch == '"') {
801
802 tokens.addToken(new AddressToken(value.toString(), QUOTED_LITERAL, position));
803
804 nextChar();
805 return;
806 }
807
808 else if (ch == '\r') {
809 syntaxError("Illegal line end in literal", position);
810 }
811 else
812 {
813 value.append(ch);
814 }
815 nextChar();
816 }
817
818 syntaxError("Missing '\"'", position);
819 }
820
821
822 /**
823 * Parse a domain literal as specified by the RFC822 specification.
824 *
825 * @param tokens The TokenStream where the parsed out token is added.
826 */
827 private void scanDomainLiteral(TokenStream tokens) throws AddressException {
828 StringBuffer value = new StringBuffer();
829
830 int startPosition = position;
831
832 nextChar();
833
834 while (moreCharacters()) {
835 char ch = currentChar();
836
837
838 if (ch == '\\') {
839
840
841
842 value.append(currentChar());
843
844 nextChar();
845 if (!moreCharacters()) {
846 syntaxError("Missing '\"'", position);
847 }
848 value.append(currentChar());
849 }
850
851 else if (ch == ']') {
852
853 tokens.addToken(new AddressToken(value.toString(), DOMAIN_LITERAL, startPosition));
854
855 nextChar();
856 return;
857 }
858
859 else if (ch == '[') {
860 syntaxError("Unexpected '['", position);
861 }
862
863 else if (ch == '\r') {
864 syntaxError("Illegal line end in domain literal", position);
865 }
866 else
867 {
868 value.append(ch);
869 }
870 nextChar();
871 }
872
873 syntaxError("Missing ']'", position);
874 }
875
876 /**
877 * Scan an atom in an internet address, using the RFC822 rules
878 * for atom delimiters.
879 *
880 * @param tokens The TokenStream where the parsed out token is added.
881 */
882 private void scanAtom(TokenStream tokens) throws AddressException {
883 int start = position;
884 nextChar();
885 while (moreCharacters()) {
886
887 char ch = currentChar();
888 if (isAtom(ch)) {
889 nextChar();
890 }
891 else {
892 break;
893 }
894 }
895
896
897 tokens.addToken(new AddressToken(addresses.substring(start, position), ATOM, start));
898 }
899
900
901 /**
902 * Parse an internet address comment field as specified by
903 * RFC822. Includes support for quoted characters and nesting.
904 *
905 * @param tokens The TokenStream where the parsed out token is added.
906 */
907 private void scanComment(TokenStream tokens) throws AddressException {
908 StringBuffer value = new StringBuffer();
909
910 int startPosition = position;
911
912 nextChar();
913
914
915 int nest = 1;
916
917
918 while (moreCharacters()) {
919 char ch = currentChar();
920
921 if (ch == '\\') {
922
923
924 nextChar();
925 if (!moreCharacters()) {
926 syntaxError("Missing ')'", position);
927 }
928 value.append(currentChar());
929 }
930
931 else if (ch == '(') {
932
933
934 nest++;
935 value.append(ch);
936 }
937
938 else if (ch == ')') {
939
940
941 nest--;
942 if (nest > 0) {
943 value.append(ch);
944 }
945 else {
946
947
948
949 nextChar();
950 tokens.addToken(new AddressToken(value.toString(), COMMENT, startPosition));
951 return;
952 }
953 }
954 else if (ch == '\r') {
955 syntaxError("Illegal line end in comment", position);
956 }
957 else {
958 value.append(ch);
959 }
960
961 nextChar();
962 }
963
964 syntaxError("Missing ')'", position);
965 }
966
967
968 /**
969 * Validate the syntax of an RFC822 group internet address specification.
970 *
971 * @param tokens The stream of tokens for the address.
972 *
973 * @exception AddressException
974 */
975 private void validateGroup(TokenStream tokens) throws AddressException {
976
977
978
979 int phraseCount = 0;
980
981 AddressToken token = tokens.nextRealToken();
982
983 while (token.type != COLON) {
984
985 if (token.type != ATOM && token.type != QUOTED_LITERAL) {
986 invalidToken(token);
987 }
988 phraseCount++;
989 token = tokens.nextRealToken();
990 }
991
992
993
994 if (phraseCount == 0) {
995 illegalAddress("Missing group identifier phrase", token);
996 }
997
998
999
1000
1001
1002 while (true) {
1003
1004
1005 validateGroupMailbox(tokens);
1006
1007 token = tokens.nextRealToken();
1008
1009
1010 if (token.type == SEMICOLON) {
1011 token = tokens.nextRealToken();
1012 if (token.type != END_OF_TOKENS) {
1013 illegalAddress("Illegal group address", token);
1014 }
1015 return;
1016 }
1017
1018
1019 else if (token.type != COMMA) {
1020 illegalAddress("Illegal group address", token);
1021 }
1022 }
1023 }
1024
1025
1026 /**
1027 * Validate the syntax of single mailbox within a group address.
1028 *
1029 * @param tokens The stream of tokens representing the address.
1030 *
1031 * @exception AddressException
1032 */
1033 private void validateGroupMailbox(TokenStream tokens) throws AddressException {
1034 AddressToken first = tokens.nextRealToken();
1035
1036 if (first.type == COMMA || first.type == SEMICOLON) {
1037 tokens.pushToken(first);
1038 return;
1039 }
1040
1041
1042 AddressToken token = first;
1043
1044
1045
1046 while (first != null) {
1047 switch (token.type) {
1048
1049 case QUOTED_LITERAL:
1050 case ATOM:
1051 break;
1052
1053
1054
1055 case LEFT_ANGLE:
1056 tokens.pushToken(first);
1057 validatePhrase(tokens, false);
1058 validateRouteAddr(tokens, true);
1059 return;
1060
1061
1062
1063 case PERIOD:
1064
1065
1066 case AT_SIGN:
1067 tokens.pushToken(first);
1068 validateAddressSpec(tokens);
1069 return;
1070
1071
1072
1073 case COMMA:
1074
1075 case SEMICOLON:
1076 tokens.pushToken(first);
1077 validateAddressSpec(tokens);
1078 return;
1079
1080 case END_OF_TOKENS:
1081 illegalAddress("Missing ';'", token);
1082
1083 }
1084 token = tokens.nextRealToken();
1085 }
1086 }
1087
1088
1089 /**
1090 * Utility method for throwing an AddressException caused by an
1091 * unexpected primitive token.
1092 *
1093 * @param token The token causing the problem (must not be a value type token).
1094 *
1095 * @exception AddressException
1096 */
1097 private void invalidToken(AddressToken token) throws AddressException {
1098 illegalAddress("Unexpected '" + token.type + "'", token);
1099 }
1100
1101
1102 /**
1103 * Raise an error about illegal syntax.
1104 *
1105 * @param message The message used in the thrown exception.
1106 * @param position The parsing position within the string.
1107 *
1108 * @exception AddressException
1109 */
1110 private void syntaxError(String message, int position) throws AddressException
1111 {
1112 throw new AddressException(message, addresses, position);
1113 }
1114
1115
1116 /**
1117 * Throw an exception based on the position of an invalid token.
1118 *
1119 * @param message The exception message.
1120 * @param token The token causing the error. This tokens position is used
1121 * in the exception information.
1122 */
1123 private void illegalAddress(String message, AddressToken token) throws AddressException {
1124 throw new AddressException(message, addresses, token.position);
1125 }
1126
1127
1128 /**
1129 * Validate that a required phrase exists.
1130 *
1131 * @param tokens The set of tokens to validate. positioned at the phrase start.
1132 * @param required A flag indicating whether the phrase is optional or required.
1133 *
1134 * @exception AddressException
1135 */
1136 private void validatePhrase(TokenStream tokens, boolean required) throws AddressException {
1137
1138
1139 AddressToken token = tokens.nextRealToken();
1140 if (token.type != ATOM && token.type != QUOTED_LITERAL) {
1141 if (required) {
1142 illegalAddress("Missing group phrase", token);
1143 }
1144 }
1145
1146
1147 token = tokens.nextRealToken();
1148 while (token.type == ATOM || token.type == QUOTED_LITERAL) {
1149 token = tokens.nextRealToken();
1150 }
1151 }
1152
1153
1154 /**
1155 * validate a routeaddr specification
1156 *
1157 * @param tokens The tokens representing the address portion (personal information
1158 * already removed).
1159 * @param ingroup true indicates we're validating a route address inside a
1160 * group list. false indicates we're validating a standalone
1161 * address.
1162 *
1163 * @exception AddressException
1164 */
1165 private void validateRouteAddr(TokenStream tokens, boolean ingroup) throws AddressException {
1166
1167 AddressToken token = tokens.nextRealToken();
1168
1169 if (token.type == AT_SIGN) {
1170
1171 tokens.pushToken(token);
1172 validateRoute(tokens);
1173 }
1174 else {
1175
1176 tokens.pushToken(token);
1177 }
1178
1179
1180 validateAddressSpec(tokens);
1181
1182 token = tokens.nextRealToken();
1183 if (ingroup) {
1184
1185
1186 if (token.type != RIGHT_ANGLE) {
1187 illegalAddress("Missing '>'", token);
1188 }
1189 }
1190 else {
1191
1192
1193 if (token.type != END_OF_TOKENS) {
1194 illegalAddress("Illegal Address", token);
1195 }
1196 }
1197 }
1198
1199
1200
1201 /**
1202 * Validate a simple address in the form "user@domain".
1203 *
1204 * @param tokens The stream of tokens representing the address.
1205 */
1206 private void validateSimpleAddress(TokenStream tokens) throws AddressException {
1207
1208
1209
1210
1211 validateAddressSpec(tokens);
1212
1213
1214 AddressToken token = tokens.nextRealToken();
1215 if (token.type != END_OF_TOKENS) {
1216 illegalAddress("Illegal Address", token);
1217 }
1218 }
1219
1220 /**
1221 * Validate the addr-spec portion of an address. RFC822 requires
1222 * this be of the form "local-part@domain". However, javamail also
1223 * allows simple address of the form "local-part". We only require
1224 * the domain if an '@' is encountered.
1225 *
1226 * @param tokens
1227 */
1228 private void validateAddressSpec(TokenStream tokens) throws AddressException {
1229
1230 validateLocalPart(tokens);
1231
1232
1233 AddressToken token = tokens.nextRealToken();
1234 if (token.type == AT_SIGN) {
1235 validateDomain(tokens);
1236 }
1237 else {
1238
1239 tokens.pushToken(token);
1240 }
1241
1242 }
1243
1244
1245 /**
1246 * Validate the route portion of a route-addr. This is a list
1247 * of domain values in the form 1#("@" domain) ":".
1248 *
1249 * @param tokens The token stream holding the address information.
1250 */
1251 private void validateRoute(TokenStream tokens) throws AddressException {
1252 while (true) {
1253 AddressToken token = tokens.nextRealToken();
1254
1255 if (token.type == AT_SIGN) {
1256 validateDomain(tokens);
1257 }
1258
1259 else if (token.type == COMMA) {
1260 continue;
1261 }
1262
1263 else if (token.type == COLON) {
1264 return;
1265 }
1266
1267 else {
1268 illegalAddress("Missing ':'", token);
1269 }
1270 }
1271 }
1272
1273
1274 /**
1275 * Parse the local part of an address spec. The local part
1276 * is a series of "words" separated by ".".
1277 */
1278 private void validateLocalPart(TokenStream tokens) throws AddressException {
1279 while (true) {
1280
1281 AddressToken token = tokens.nextRealToken();
1282
1283
1284 if (token.type != ATOM && token.type != QUOTED_LITERAL) {
1285 illegalAddress("Invalid local part", token);
1286 }
1287
1288
1289 token = tokens.nextRealToken();
1290
1291 if (token.type != PERIOD) {
1292 tokens.pushToken(token);
1293
1294 return;
1295 }
1296 }
1297 }
1298
1299
1300
1301 /**
1302 * Parse a domain name of the form sub-domain *("." sub-domain).
1303 * a sub-domain is either an atom or a domain-literal.
1304 */
1305 private void validateDomain(TokenStream tokens) throws AddressException {
1306 while (true) {
1307
1308 AddressToken token = tokens.nextRealToken();
1309
1310
1311 if (token.type != ATOM && token.type != DOMAIN_LITERAL) {
1312 illegalAddress("Invalid domain", token);
1313 }
1314
1315
1316 token = tokens.nextRealToken();
1317
1318 if (token.type != PERIOD) {
1319
1320 tokens.pushToken(token);
1321 return;
1322 }
1323 }
1324 }
1325
1326 /**
1327 * Convert a list of word tokens into a phrase string. The
1328 * rules for this are a little hard to puzzle out, but there
1329 * is a logic to it. If the list is empty, the phrase is
1330 * just a null value.
1331 *
1332 * If we have a phrase, then the quoted strings need to
1333 * handled appropriately. In multi-token phrases, the
1334 * quoted literals are concatenated with the quotes intact,
1335 * regardless of content. Thus a phrase that comes in like this:
1336 *
1337 * "Geronimo" Apache
1338 *
1339 * gets converted back to the same string.
1340 *
1341 * If there is just a single token in the phrase, AND the token
1342 * is a quoted string AND the string does not contain embedded
1343 * special characters ("\.,@<>()[]:;), then the phrase
1344 * is expressed as an atom. Thus the literal
1345 *
1346 * "Geronimo"
1347 *
1348 * becomes
1349 *
1350 * Geronimo
1351 *
1352 * but
1353 *
1354 * "(Geronimo)"
1355 *
1356 * remains
1357 *
1358 * "(Geronimo)"
1359 *
1360 * Note that we're generating a canonical form of the phrase,
1361 * which removes comments and reduces linear whitespace down
1362 * to a single separator token.
1363 *
1364 * @param phrase An array list of phrase tokens (which may be empty).
1365 */
1366 private String personalToString(TokenStream tokens) {
1367
1368
1369 AddressToken token = tokens.nextToken();
1370
1371 if (token.type == END_OF_TOKENS) {
1372 return null;
1373 }
1374
1375 AddressToken next = tokens.nextToken();
1376
1377
1378 if (next.type == END_OF_TOKENS) {
1379
1380
1381 return token.value;
1382 }
1383
1384
1385 tokens.pushToken(token);
1386
1387
1388 StringBuffer buffer = new StringBuffer();
1389
1390
1391 token = tokens.nextToken();
1392 addTokenValue(token, buffer);
1393
1394 token = tokens.nextToken();
1395 while (token.type != END_OF_TOKENS) {
1396
1397 buffer.append(' ');
1398
1399 addTokenValue(token, buffer);
1400 token = tokens.nextToken();
1401 }
1402
1403 return buffer.toString();
1404 }
1405
1406
1407 /**
1408 * take a canonicalized set of address tokens and reformat it back into a string value,
1409 * inserting whitespace where appropriate.
1410 *
1411 * @param tokens The set of tokens representing the address.
1412 *
1413 * @return The string value of the tokens.
1414 */
1415 private String addressToString(TokenStream tokens) {
1416 StringBuffer buffer = new StringBuffer();
1417
1418
1419
1420
1421
1422 boolean spaceRequired = false;
1423
1424
1425 AddressToken token = tokens.nextToken();
1426
1427
1428 while (token.type != END_OF_TOKENS) {
1429 switch (token.type) {
1430
1431
1432 case ATOM:
1433 case QUOTED_LITERAL:
1434
1435 if (spaceRequired) {
1436 buffer.append(' ');
1437 }
1438 addTokenValue(token, buffer);
1439
1440 spaceRequired = true;
1441 break;
1442
1443
1444
1445
1446 case LEFT_ANGLE:
1447 case RIGHT_ANGLE:
1448 case COMMA:
1449 case COLON:
1450 case AT_SIGN:
1451 case SEMICOLON:
1452 case PERIOD:
1453 buffer.append((char)token.type);
1454
1455 spaceRequired = false;
1456 break;
1457
1458
1459 case DOMAIN_LITERAL:
1460 addTokenValue(token, buffer);
1461 spaceRequired = false;
1462 break;
1463
1464
1465 case COMMENT:
1466 addTokenValue(token, buffer);
1467 spaceRequired = false;
1468 break;
1469 }
1470 token = tokens.nextToken();
1471 }
1472 return buffer.toString();
1473 }
1474
1475
1476 /**
1477 * Append a value token on to a string buffer used to create
1478 * the canonicalized string value.
1479 *
1480 * @param token The token we're adding.
1481 * @param buffer The target string buffer.
1482 */
1483 private void addTokenValue(AddressToken token, StringBuffer buffer) {
1484
1485 if (token.type == ATOM) {
1486 buffer.append(token.value);
1487 }
1488
1489 else if (token.type == QUOTED_LITERAL) {
1490 buffer.append(formatQuotedString(token.value));
1491 }
1492
1493 else if (token.type == DOMAIN_LITERAL) {
1494 buffer.append('[');
1495 buffer.append(token.value);
1496 buffer.append(']');
1497 }
1498
1499 else if (token.type == COMMENT) {
1500 buffer.append('(');
1501 buffer.append(token.value);
1502 buffer.append(')');
1503 }
1504 }
1505
1506
1507
1508 private static final byte[] CHARMAP = {
1509 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x06, 0x02, 0x06, 0x02, 0x02, 0x06, 0x02, 0x02,
1510 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
1511 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00,
1512 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x00, 0x01, 0x00,
1513
1514 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1515 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x00, 0x00,
1516 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1517 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
1518 };
1519
1520 private static final byte FLG_SPECIAL = 1;
1521 private static final byte FLG_CONTROL = 2;
1522 private static final byte FLG_SPACE = 4;
1523
1524 private static boolean isSpace(char ch) {
1525 if (ch > '\u007f') {
1526 return false;
1527 } else {
1528 return (CHARMAP[ch] & FLG_SPACE) != 0;
1529 }
1530 }
1531
1532 /**
1533 * Quick test to see if a character is an allowed atom character
1534 * or not.
1535 *
1536 * @param ch The test character.
1537 *
1538 * @return true if this character is allowed in atoms, false for any
1539 * control characters, special characters, or blanks.
1540 */
1541 public static boolean isAtom(char ch) {
1542 if (ch > '\u007f') {
1543 return false;
1544 }
1545 else if (ch == ' ') {
1546 return false;
1547 }
1548 else {
1549 return (CHARMAP[ch] & (FLG_SPECIAL | FLG_CONTROL)) == 0;
1550 }
1551 }
1552
1553 /**
1554 * Tests one string to determine if it contains any of the
1555 * characters in a supplied test string.
1556 *
1557 * @param s The string we're testing.
1558 * @param chars The set of characters we're testing against.
1559 *
1560 * @return true if any of the characters is found, false otherwise.
1561 */
1562 public static boolean containsCharacters(String s, String chars)
1563 {
1564 for (int i = 0; i < s.length(); i++) {
1565 if (chars.indexOf(s.charAt(i)) >= 0) {
1566 return true;
1567 }
1568 }
1569 return false;
1570 }
1571
1572
1573 /**
1574 * Tests if a string contains any non-special characters that
1575 * would require encoding the value as a quoted string rather
1576 * than a simple atom value.
1577 *
1578 * @param s The test string.
1579 *
1580 * @return True if the string contains only blanks or allowed atom
1581 * characters.
1582 */
1583 public static boolean containsSpecials(String s)
1584 {
1585 for (int i = 0; i < s.length(); i++) {
1586 char ch = s.charAt(i);
1587
1588 if (ch == ' ' || isAtom(ch)) {
1589 continue;
1590 }
1591 else {
1592 return true;
1593 }
1594 }
1595 return false;
1596 }
1597
1598
1599 /**
1600 * Tests if a string contains any non-special characters that
1601 * would require encoding the value as a quoted string rather
1602 * than a simple atom value.
1603 *
1604 * @param s The test string.
1605 *
1606 * @return True if the string contains only blanks or allowed atom
1607 * characters.
1608 */
1609 public static boolean isAtom(String s)
1610 {
1611 for (int i = 0; i < s.length(); i++) {
1612 char ch = s.charAt(i);
1613
1614 if (!isAtom(ch)) {
1615 return false;
1616 }
1617 }
1618 return true;
1619 }
1620
1621 /**
1622 * Apply RFC822 quoting rules to a literal string value. This
1623 * will search the string to see if there are any characters that
1624 * require special escaping, and apply the escapes. If the
1625 * string is just a string of blank-delimited atoms, the string
1626 * value is returned without quotes.
1627 *
1628 * @param s The source string.
1629 *
1630 * @return A version of the string as a valid RFC822 quoted literal.
1631 */
1632 public static String quoteString(String s) {
1633
1634
1635
1636 if (s.indexOf('\\') == -1 && s.indexOf('"') == -1) {
1637
1638 if (!containsSpecials(s)) {
1639 return s;
1640 }
1641 StringBuffer buffer = new StringBuffer(s.length() + 2);
1642 buffer.append('"');
1643 buffer.append(s);
1644 buffer.append('"');
1645 return buffer.toString();
1646 }
1647
1648
1649
1650 StringBuffer buffer = new StringBuffer(s.length() + 10);
1651 buffer.append('"');
1652
1653
1654 for (int i = 0; i < s.length(); i++) {
1655 char ch = s.charAt(i);
1656
1657 if (ch == '\\' || ch == '"') {
1658
1659 buffer.append('\\');
1660 }
1661
1662 buffer.append(ch);
1663 }
1664 buffer.append('"');
1665 return buffer.toString();
1666 }
1667
1668 /**
1669 * Apply RFC822 quoting rules to a literal string value. This
1670 * will search the string to see if there are any characters that
1671 * require special escaping, and apply the escapes. The returned
1672 * value is enclosed in quotes.
1673 *
1674 * @param s The source string.
1675 *
1676 * @return A version of the string as a valid RFC822 quoted literal.
1677 */
1678 public static String formatQuotedString(String s) {
1679
1680
1681 if (s.indexOf('\\') == -1 && s.indexOf('"') == -1) {
1682 StringBuffer buffer = new StringBuffer(s.length() + 2);
1683 buffer.append('"');
1684 buffer.append(s);
1685 buffer.append('"');
1686 return buffer.toString();
1687 }
1688
1689
1690
1691 StringBuffer buffer = new StringBuffer(s.length() + 10);
1692 buffer.append('"');
1693
1694
1695 for (int i = 0; i < s.length(); i++) {
1696 char ch = s.charAt(i);
1697
1698 if (ch == '\\' || ch == '"') {
1699
1700 buffer.append('\\');
1701 }
1702
1703 buffer.append(ch);
1704 }
1705 buffer.append('"');
1706 return buffer.toString();
1707 }
1708
1709 public class TokenStream {
1710
1711 private List tokens;
1712
1713
1714 int currentToken = 0;
1715
1716
1717 /**
1718 * Default constructor for a TokenStream. This creates an
1719 * empty TokenStream for purposes of tokenizing an address.
1720 * It is the creator's responsibility to terminate the stream
1721 * with a terminator token.
1722 */
1723 public TokenStream() {
1724 tokens = new ArrayList();
1725 }
1726
1727
1728 /**
1729 * Construct a TokenStream from a list of tokens. A terminator
1730 * token is added to the end.
1731 *
1732 * @param tokens An existing token list.
1733 */
1734 public TokenStream(List tokens) {
1735 this.tokens = tokens;
1736 tokens.add(new AddressToken(END_OF_TOKENS, -1));
1737 }
1738
1739 /**
1740 * Add an address token to the token list.
1741 *
1742 * @param t The new token to add to the list.
1743 */
1744 public void addToken(AddressToken token) {
1745 tokens.add(token);
1746 }
1747
1748 /**
1749 * Get the next token at the cursor position, advancing the
1750 * position accordingly.
1751 *
1752 * @return The token at the current token position.
1753 */
1754 public AddressToken nextToken() {
1755 AddressToken token = (AddressToken)tokens.get(currentToken++);
1756
1757
1758 while (token.type == WHITESPACE) {
1759 token = (AddressToken)tokens.get(currentToken++);
1760 }
1761 return token;
1762 }
1763
1764
1765 /**
1766 * Get the next token at the cursor position, without advancing the
1767 * position.
1768 *
1769 * @return The token at the current token position.
1770 */
1771 public AddressToken currentToken() {
1772
1773 return (AddressToken)tokens.get(currentToken);
1774 }
1775
1776
1777 /**
1778 * Get the next non-comment token from the string. Comments are ignored, except as personal information
1779 * for very simple address specifications.
1780 *
1781 * @return A token guaranteed not to be a whitespace token.
1782 */
1783 public AddressToken nextRealToken()
1784 {
1785 AddressToken token = nextToken();
1786 if (token.type == COMMENT) {
1787 token = nextToken();
1788 }
1789 return token;
1790 }
1791
1792 /**
1793 * Push a token back on to the queue, making the index of this
1794 * token the current cursor position.
1795 *
1796 * @param token The token to push.
1797 */
1798 public void pushToken(AddressToken token) {
1799
1800 currentToken = tokenIndex(token);
1801 }
1802
1803 /**
1804 * Get the next token after a given token, without advancing the
1805 * token position.
1806 *
1807 * @param token The token we're retrieving a token relative to.
1808 *
1809 * @return The next token in the list.
1810 */
1811 public AddressToken nextToken(AddressToken token) {
1812 return (AddressToken)tokens.get(tokenIndex(token) + 1);
1813 }
1814
1815
1816 /**
1817 * Return the token prior to a given token.
1818 *
1819 * @param token The token used for the index.
1820 *
1821 * @return The token prior to the index token in the list.
1822 */
1823 public AddressToken previousToken(AddressToken token) {
1824 return (AddressToken)tokens.get(tokenIndex(token) - 1);
1825 }
1826
1827
1828 /**
1829 * Retrieve a token at a given index position.
1830 *
1831 * @param index The target index.
1832 */
1833 public AddressToken getToken(int index)
1834 {
1835 return (AddressToken)tokens.get(index);
1836 }
1837
1838
1839 /**
1840 * Retrieve the index of a particular token in the stream.
1841 *
1842 * @param token The target token.
1843 *
1844 * @return The index of the token within the stream. Returns -1 if this
1845 * token is somehow not in the stream.
1846 */
1847 public int tokenIndex(AddressToken token) {
1848 return tokens.indexOf(token);
1849 }
1850
1851
1852 /**
1853 * Extract a new TokenStream running from the start token to the
1854 * token preceeding the end token.
1855 *
1856 * @param start The starting token of the section.
1857 * @param end The last token (+1) for the target section.
1858 *
1859 * @return A new TokenStream object for processing this section of tokens.
1860 */
1861 public TokenStream section(AddressToken start, AddressToken end) {
1862 int startIndex = tokenIndex(start);
1863 int endIndex = tokenIndex(end);
1864
1865
1866
1867
1868 ArrayList list = new ArrayList(endIndex - startIndex + 2);
1869
1870 for (int i = startIndex; i <= endIndex; i++) {
1871 list.add(tokens.get(i));
1872 }
1873 return new TokenStream(list);
1874 }
1875
1876
1877 /**
1878 * Reset the token position back to the beginning of the
1879 * stream.
1880 */
1881 public void reset() {
1882 currentToken = 0;
1883 }
1884
1885 /**
1886 * Scan forward looking for a non-blank token.
1887 *
1888 * @return The first non-blank token in the stream.
1889 */
1890 public AddressToken getNonBlank()
1891 {
1892 AddressToken token = currentToken();
1893 while (token.type == WHITESPACE) {
1894 currentToken++;
1895 token = currentToken();
1896 }
1897 return token;
1898 }
1899
1900
1901 /**
1902 * Extract a blank delimited token from a TokenStream. A blank
1903 * delimited token is the set of tokens up to the next real whitespace
1904 * token (comments not included).
1905 *
1906 * @return A TokenStream object with the new set of tokens.
1907 */
1908 public TokenStream getBlankDelimitedToken()
1909 {
1910
1911 AddressToken first = getNonBlank();
1912
1913 if (first.type == END_OF_TOKENS) {
1914 return null;
1915 }
1916
1917 AddressToken last = first;
1918
1919
1920
1921 currentToken++;
1922
1923 AddressToken token = currentToken();
1924 while (true) {
1925
1926 if (token.type == END_OF_TOKENS || token.type == WHITESPACE) {
1927 return section(first, last);
1928 }
1929 last = token;
1930 currentToken++;
1931
1932 token = currentToken();
1933 }
1934 }
1935
1936 /**
1937 * Return the index of the current cursor position.
1938 *
1939 * @return The integer index of the current token.
1940 */
1941 public int currentIndex() {
1942 return currentToken;
1943 }
1944
1945 public void dumpTokens()
1946 {
1947 System.out.println(">>>>>>>>> Start dumping TokenStream tokens");
1948 for (int i = 0; i < tokens.size(); i++) {
1949 System.out.println("-------- Token: " + tokens.get(i));
1950 }
1951
1952 System.out.println("++++++++ cursor position=" + currentToken);
1953 System.out.println(">>>>>>>>> End dumping TokenStream tokens");
1954 }
1955 }
1956
1957
1958 /**
1959 * Simple utility class for representing address tokens.
1960 */
1961 public class AddressToken {
1962
1963
1964 int type;
1965
1966
1967 String value;
1968
1969
1970 int position;
1971
1972 AddressToken(int type, int position)
1973 {
1974 this.type = type;
1975 this.value = null;
1976 this.position = position;
1977 }
1978
1979 AddressToken(String value, int type, int position)
1980 {
1981 this.type = type;
1982 this.value = value;
1983 this.position = position;
1984 }
1985
1986 public String toString()
1987 {
1988 if (type == END_OF_TOKENS) {
1989 return "AddressToken: type=END_OF_TOKENS";
1990 }
1991 if (value == null) {
1992 return "AddressToken: type=" + (char)type;
1993 }
1994 else {
1995 return "AddressToken: type=" + (char)type + " value=" + value;
1996 }
1997 }
1998 }
1999 }
2000