Maven Clover report

Clover coverage report - Maven Clover report

Coverage timestamp: Sun Aug 20 2006 04:01:04 PDT

FRAMES NO FRAMES

file stats:	LOC:	2,000		Methods:	58
	NCLOC:	988		Classes:	3

Source file

Conditionals

Statements

Methods

TOTAL

AddressParser.java

76.1%

86.1%

86.2%

83.6%

1		/**
2		*
3		* Copyright 2003-2004 The Apache Software Foundation
4		*
5		* Licensed under the Apache License, Version 2.0 (the "License");
6		* you may not use this file except in compliance with the License.
7		* You may obtain a copy of the License at
8		*
9		* http://www.apache.org/licenses/LICENSE-2.0
10		*
11		* Unless required by applicable law or agreed to in writing, software
12		* distributed under the License is distributed on an "AS IS" BASIS,
13		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14		* See the License for the specific language governing permissions and
15		* limitations under the License.
16		*/
17
18		package javax.mail.internet;
19
20		import java.io.UnsupportedEncodingException;
21		import java.lang.reflect.Array;
22		import java.util.ArrayList;
23		import java.util.List;
24
25		class AddressParser {
26
27		// the validation strictness levels, from most lenient to most conformant.
28		static public final int NONSTRICT = 0;
29		static public final int PARSE_HEADER = 1;
30		static public final int STRICT = 2;
31
32		// different mailbox types
33		static protected final int UNKNOWN = 0;
34		static protected final int ROUTE_ADDR = 1;
35		static protected final int GROUP_ADDR = 2;
36		static protected final int SIMPLE_ADDR = 3;
37
38		// constants for token types.
39		static protected final int END_OF_TOKENS = '\0';
40		static protected final int PERIOD = '.';
41		static protected final int LEFT_ANGLE = '<';
42		static protected final int RIGHT_ANGLE = '>';
43		static protected final int COMMA = ',';
44		static protected final int AT_SIGN = '@';
45		static protected final int SEMICOLON = ';';
46		static protected final int COLON = ':';
47		static protected final int QUOTED_LITERAL = '"';
48		static protected final int DOMAIN_LITERAL = '[';
49		static protected final int COMMENT = '(';
50		static protected final int ATOM = 'A';
51		static protected final int WHITESPACE = ' ';
52
53
54		// the string we're parsing
55		private String addresses;
56		// the current parsing position
57		private int position;
58		// the end position of the string
59		private int end;
60		// the strictness flag
61		private int validationLevel;
62
63	326	public AddressParser(String addresses, int validation) {
64	326	this.addresses = addresses;
65	326	validationLevel = validation;
66		}
67
68
69		/**
70		* Parse an address list into an array of internet addresses.
71		*
72		* @return An array containing all of the non-null addresses in the list.
73		* @exception AddressException
74		* Thrown for any validation errors.
75		*/
76	187	public InternetAddress[] parseAddressList() throws AddressException
77		{
78		// get the address as a set of tokens we can process.
79	187	TokenStream tokens = tokenizeAddress();
80
81		// get an array list accumulator.
82	179	ArrayList addressList = new ArrayList();
83
84		// we process sections of the token stream until we run out of tokens.
85	179	while (true) {
86		// parse off a single address. Address lists can have null elements,
87		// so this might return a null value. The null value does not get added
88		// to the address accumulator.
89	199	addressList.addAll(parseSingleAddress(tokens, false));
90		// This token should be either a "," delimiter or a stream terminator. If we're
91		// at the end, time to get out.
92	189	AddressToken token = tokens.nextToken();
93	189	if (token.type == END_OF_TOKENS) {
94	169	break;
95		}
96		}
97
98	169	return (InternetAddress [])addressList.toArray(new InternetAddress[0]);
99		}
100
101
102		/**
103		* Parse a single internet address. This must be a single address,
104		* not an address list.
105		*
106		* @exception AddressException
107		*/
108	107	public InternetAddress parseAddress() throws AddressException
109		{
110		// get the address as a set of tokens we can process.
111	107	TokenStream tokens = tokenizeAddress();
112
113		// parse off a single address. Address lists can have null elements,
114		// so this might return a null value. The null value does not get added
115		// to the address accumulator.
116	107	List addressList = parseSingleAddress(tokens, false);
117		// we must get exactly one address back from this.
118	107	if (addressList.isEmpty()) {
119	0	throw new AddressException("Null address", addresses, 0);
120		}
121		// this could be a simple list of blank delimited tokens. Ensure we only got one back.
122	107	if (addressList.size() > 1) {
123	0	throw new AddressException("Illegal Address", addresses, 0);
124		}
125
126		// This token must be a stream stream terminator, or we have an error.
127	107	AddressToken token = tokens.nextToken();
128	107	if (token.type != END_OF_TOKENS) {
129	0	illegalAddress("Illegal Address", token);
130		}
131
132	107	return (InternetAddress)addressList.get(0);
133		}
134
135
136		/**
137		* Validate an internet address. This must be a single address,
138		* not a list of addresses. The address also must not contain
139		* and personal information to be valid.
140		*
141		* @exception AddressException
142		*/
143	18	public void validateAddress() throws AddressException
144		{
145		// get the address as a set of tokens we can process.
146	18	TokenStream tokens = tokenizeAddress();
147
148		// parse off a single address. Address lists can have null elements,
149		// so this might return a null value. The null value does not get added
150		// to the address accumulator.
151	17	List addressList = parseSingleAddress(tokens, false);
152	11	if (addressList.isEmpty()) {
153	0	throw new AddressException("Null address", addresses, 0);
154		}
155
156		// this could be a simple list of blank delimited tokens. Ensure we only got one back.
157	11	if (addressList.size() > 1) {
158	0	throw new AddressException("Illegal Address", addresses, 0);
159		}
160
161	11	InternetAddress address = (InternetAddress)addressList.get(0);
162
163		// validation occurs on an address that's already been split into personal and address
164		// data.
165	11	if (address.personal != null) {
166	0	throw new AddressException("Illegal Address", addresses, 0);
167		}
168		// This token must be a stream stream terminator, or we have an error.
169	11	AddressToken token = tokens.nextToken();
170	11	if (token.type != END_OF_TOKENS) {
171	0	illegalAddress("Illegal Address", token);
172		}
173		}
174
175
176		/**
177		* Extract the set of address from a group Internet specification.
178		*
179		* @return An array containing all of the non-null addresses in the list.
180		* @exception AddressException
181		*/
182	14	public InternetAddress[] extractGroupList() throws AddressException
183		{
184		// get the address as a set of tokens we can process.
185	14	TokenStream tokens = tokenizeAddress();
186
187		// get an array list accumulator.
188	14	ArrayList addresses = new ArrayList();
189
190	14	AddressToken token = tokens.nextToken();
191
192		// scan forward to the ':' that starts the group list. If we don't find one,
193		// this is an exception.
194	14	while (token.type != COLON) {
195	14	if (token.type == END_OF_TOKENS) {
196	0	illegalAddress("Missing ':'", token);
197		}
198	14	token = tokens.nextToken();
199		}
200
201		// we process sections of the token stream until we run out of tokens.
202	14	while (true) {
203		// parse off a single address. Address lists can have null elements,
204		// so this might return a null value. The null value does not get added
205		// to the address accumulator.
206	26	addresses.addAll(parseSingleAddress(tokens, true));
207		// This token should be either a "," delimiter or a group terminator. If we're
208		// at the end, this is an error.
209	26	token = tokens.nextToken();
210	26	if (token.type == SEMICOLON) {
211	14	break;
212		}
213	12	else if (token.type == END_OF_TOKENS) {
214	0	illegalAddress("Missing ';'", token);
215		}
216		}
217
218	14	return (InternetAddress [])addresses.toArray(new InternetAddress[0]);
219		}
220
221
222		/**
223		* Parse out a single address from a string from a string
224		* of address tokens, returning an InternetAddress object that
225		* represents the address.
226		*
227		* @param tokens The token source for this address.
228		*
229		* @return A parsed out and constructed InternetAddress object for
230		* the next address. Returns null if this is an "empty"
231		* address in a list.
232		* @exception AddressException
233		*/
234	349	private List parseSingleAddress(TokenStream tokens, boolean inGroup) throws AddressException
235		{
236	349	List parsedAddresses = new ArrayList();
237
238		// index markers for personal information
239	349	AddressToken personalStart = null;
240	349	AddressToken personalEnd = null;
241
242		// and similar bits for the address information.
243	349	AddressToken addressStart = null;
244	349	AddressToken addressEnd = null;
245
246		// there is a fall-back set of rules allowed that will parse the address as a set of blank delimited
247		// tokens. However, we do NOT allow this if we encounter any tokens that fall outside of these
248		// rules. For example, comment fields and quoted strings will disallow the very lenient rule set.
249	349	boolean nonStrictRules = true;
250
251		// we don't know the type of address yet
252	349	int addressType = UNKNOWN;
253
254		// the parsing goes in two stages. Stage one runs through the tokens locating the bounds
255		// of the address we're working on, resolving the personal information, and also validating
256		// some of the larger scale syntax features of an address (matched delimiters for routes and
257		// groups, invalid nesting checks, etc.).
258
259		// get the next token from the queue and save this. We're going to scan ahead a bit to
260		// figure out what type of address we're looking at, then reset to do the actually parsing
261		// once we've figured out a form.
262	349	AddressToken first = tokens.nextToken();
263		// push it back on before starting processing.
264	349	tokens.pushToken(first);
265
266		// scan ahead for a trigger token that tells us what we've got.
267	349	while (addressType == UNKNOWN) {
268
269	1458	AddressToken token = tokens.nextToken();
270	1458	switch (token.type) {
271		// skip these for now...after we've processed everything and found that this is a simple
272		// address form, then we'll check for a leading comment token in the first position and use
273		// if as personal information.
274	53	case COMMENT:
275		// comments do, however, denote that this must be parsed according to RFC822 rules.
276	53	nonStrictRules = false;
277	53	break;
278
279		// a semi-colon when processing a group is an address terminator. we need to
280		// process this like a comma then
281	14	case SEMICOLON:
282	14	if (inGroup) {
283		// we need to push the terminator back on for the caller to see.
284	14	tokens.pushToken(token);
285		// if we've not tagged any tokens as being the address beginning, so this must be a
286		// null address.
287	14	if (addressStart == null) {
288		// just return the empty list from this.
289	2	return parsedAddresses;
290		}
291		// the end token is the back part.
292	12	addressEnd = tokens.previousToken(token);
293		// without a '<' for a route addr, we can't distinguish address tokens from personal data.
294		// We'll use a leading comment, if there is one.
295	12	personalStart = null;
296		// this is just a simple form.
297	12	addressType = SIMPLE_ADDR;
298	12	break;
299		}
300
301		// NOTE: The above falls through if this is not a group.
302
303		// any of these tokens are a real token that can be the start of an address. Many of
304		// them are not valid as first tokens in this context, but we flag them later if validation
305		// has been requested. For now, we just mark these as the potential address start.
306	25	case DOMAIN_LITERAL:
307	42	case QUOTED_LITERAL:
308		// this set of tokens require fuller RFC822 parsing, so turn off the flag.
309	67	nonStrictRules = false;
310
311	615	case ATOM:
312	169	case AT_SIGN:
313	205	case PERIOD:
314		// if we're not determined the start of the address yet, then check to see if we
315		// need to consider this the personal start.
316	1056	if (addressStart == null) {
317	275	if (personalStart == null) {
318	275	personalStart = token;
319		}
320		// This is the first real token of the address, which at this point can
321		// be either the personal info or the first token of the address. If we hit
322		// an address terminator without encountering either a route trigger or group
323		// trigger, then this is the real address.
324	275	addressStart = token;
325		}
326	1056	break;
327
328		// a LEFT_ANGLE indicates we have a full RFC822 mailbox form. The leading phrase
329		// is the personal info. The address is inside the brackets.
330	102	case LEFT_ANGLE:
331		// a route address automatically switches off the blank-delimited token mode.
332	102	nonStrictRules = false;
333		// this is a route address
334	102	addressType = ROUTE_ADDR;
335		// the address is placed in the InternetAddress object without the route
336		// brackets, so our start is one past this.
337	102	addressStart = tokens.nextRealToken();
338		// push this back on the queue so the scanner picks it up properly.
339	102	tokens.pushToken(addressStart);
340		// make sure we flag the end of the personal section too.
341	102	if (personalStart != null) {
342	45	personalEnd = tokens.previousToken(token);
343		}
344		// scan the rest of a route address.
345	102	addressEnd = scanRouteAddress(tokens, false);
346	100	break;
347
348		// a COLON indicates this is a group specifier...parse the group.
349	35	case COLON:
350		// Colons would not be valid in simple lists, so turn it off.
351	35	nonStrictRules = false;
352		// if we're scanning a group, we shouldn't encounter a ":". This is a
353		// recursion error if found.
354	35	if (inGroup) {
355	0	illegalAddress("Nested group element", token);
356		}
357	35	addressType = GROUP_ADDR;
358		// groups don't have any personal sections.
359	35	personalStart = null;
360		// our real start was back at the beginning
361	35	addressStart = first;
362	35	addressEnd = scanGroupAddress(tokens);
363	32	break;
364
365		// a semi colon can the same as a comma if we're processing a group.
366
367
368		// reached the end of string...this might be a null address, or one of the very simple name
369		// forms used for non-strict RFC822 versions. Reset, and try that form
370	177	case END_OF_TOKENS:
371		// if we're scanning a group, we shouldn't encounter an end token. This is an
372		// error if found.
373	177	if (inGroup) {
374	0	illegalAddress("Missing ';'", token);
375		}
376
377		// NOTE: fall through from above.
378
379		// this is either a terminator for an address list or a a group terminator.
380	21	case COMMA:
381		// we need to push the terminator back on for the caller to see.
382	198	tokens.pushToken(token);
383		// if we've not tagged any tokens as being the address beginning, so this must be a
384		// null address.
385	198	if (addressStart == null) {
386		// just return the empty list from this.
387	14	return parsedAddresses;
388		}
389		// the end token is the back part.
390	184	addressEnd = tokens.previousToken(token);
391		// without a '<' for a route addr, we can't distinguish address tokens from personal data.
392		// We'll use a leading comment, if there is one.
393	184	personalStart = null;
394		// this is just a simple form.
395	184	addressType = SIMPLE_ADDR;
396	184	break;
397
398		// right angle tokens are pushed, because parsing of the bracketing is not necessarily simple.
399		// we need to flag these here.
400	0	case RIGHT_ANGLE:
401	0	illegalAddress("Unexpected '>'", token);
402
403		}
404		}
405
406	328	String personal = null;
407
408		// if we have personal data, then convert it to a string value.
409	328	if (personalStart != null) {
410	44	TokenStream personalTokens = tokens.section(personalStart, personalEnd);
411	44	personal = personalToString(personalTokens);
412		}
413		// if we have a simple address, then check the first token to see if it's a comment. For simple addresses,
414		// we'll accept the first comment token as the personal information.
415		else {
416	284	if (addressType == SIMPLE_ADDR && first.type == COMMENT) {
417	19	personal = first.value;
418		}
419		}
420
421	328	TokenStream addressTokens = tokens.section(addressStart, addressEnd);
422
423		// if this is one of the strictly RFC822 types, then we always validate the address. If this is a
424		// a simple address, then we only validate if strict parsing rules are in effect or we've been asked
425		// to validate.
426	328	if (validationLevel != PARSE_HEADER) {
427	308	switch (addressType) {
428	31	case GROUP_ADDR:
429	31	validateGroup(addressTokens);
430	30	break;
431
432	91	case ROUTE_ADDR:
433	91	validateRouteAddr(addressTokens, false);
434	90	break;
435
436	186	case SIMPLE_ADDR:
437		// this is a conditional validation
438	186	validateSimpleAddress(addressTokens);
439	177	break;
440		}
441		}
442
443		// more complex addresses and addresses containing tokens other than just simple addresses
444		// need proper handling.
445	317	if (validationLevel != NONSTRICT \|\| addressType != SIMPLE_ADDR \|\| !nonStrictRules) {
446		// we might have traversed this already when we validated, so reset the
447		// position before using this again.
448	302	addressTokens.reset();
449	302	String address = addressToString(addressTokens);
450
451		// get the parsed out sections as string values.
452	302	InternetAddress result = new InternetAddress();
453	302	result.setAddress(address);
454	302	try {
455	302	result.setPersonal(personal);
456		} catch (UnsupportedEncodingException e) {
457		}
458		// even though we have a single address, we return this as an array. Simple addresses
459		// can be produce an array of items, so we need to return everything.
460	302	parsedAddresses.add(result);
461	302	return parsedAddresses;
462		}
463		else {
464	15	addressTokens.reset();
465
466	15	TokenStream nextAddress = addressTokens.getBlankDelimitedToken();
467	15	while (nextAddress != null) {
468	15	String address = addressToString(nextAddress);
469		// get the parsed out sections as string values.
470	15	InternetAddress result = new InternetAddress();
471	15	result.setAddress(address);
472	15	parsedAddresses.add(result);
473	15	nextAddress = addressTokens.getBlankDelimitedToken();
474		}
475	15	return parsedAddresses;
476		}
477		}
478
479
480		/**
481		* Scan the token stream, parsing off a route addr spec. This
482		* will do some basic syntax validation, but will not actually
483		* validate any of the address information. Comments will be
484		* discarded.
485		*
486		* @param tokens The stream of tokens.
487		*
488		* @return The last token of the route address (the one preceeding the
489		* terminating '>'.
490		*/
491	114	private AddressToken scanRouteAddress(TokenStream tokens, boolean inGroup) throws AddressException {
492		// get the first token and ensure we have something between the "<" and ">".
493	114	AddressToken token = tokens.nextRealToken();
494		// the last processed non-whitespace token, which is the actual address end once the
495		// right angle bracket is encountered.
496
497	114	AddressToken previous = null;
498
499		// if this route-addr has route information, the first token after the '<' must be a '@'.
500		// this determines if/where a colon or comma can appear.
501	114	boolean inRoute = token.type == AT_SIGN;
502
503		// now scan until we reach the terminator. The only validation is done on illegal characters.
504	114	while (true) {
505	927	switch (token.type) {
506		// The following tokens are all valid between the brackets, so just skip over them.
507	437	case ATOM:
508	4	case QUOTED_LITERAL:
509	7	case DOMAIN_LITERAL:
510	191	case PERIOD:
511	143	case AT_SIGN:
512	782	break;
513
514	22	case COLON:
515		// if not processing route information, this is illegal.
516	22	if (!inRoute) {
517	0	illegalAddress("Unexpected ':'", token);
518		}
519		// this is the end of the route information, the rules now change.
520	22	inRoute = false;
521	22	break;
522
523	10	case COMMA:
524		// if not processing route information, this is illegal.
525	10	if (!inRoute) {
526	1	illegalAddress("Unexpected ','", token);
527		}
528	9	break;
529
530	111	case RIGHT_ANGLE:
531		// if previous is null, we've had a route address which is "<>". That's illegal.
532	111	if (previous == null) {
533	0	illegalAddress("Illegal address", token);
534		}
535		// step to the next token..this had better be either a comma for another address or
536		// the very end of the address list .
537	111	token = tokens.nextRealToken();
538		// if we're scanning part of a group, then the allowed terminators are either ',' or ';'.
539	111	if (inGroup) {
540	11	if (token.type != COMMA && token.type != SEMICOLON) {
541	0	illegalAddress("Illegal address", token);
542		}
543		}
544		// a normal address should have either a ',' for a list or the end.
545		else {
546	100	if (token.type != COMMA && token.type != END_OF_TOKENS) {
547	0	illegalAddress("Illegal address", token);
548		}
549		}
550		// we need to push the termination token back on.
551	111	tokens.pushToken(token);
552		// return the previous token as the updated position.
553	111	return previous;
554
555	1	case END_OF_TOKENS:
556	1	illegalAddress("Missing '>'", token);
557
558		// now for the illegal ones in this context.
559	0	case SEMICOLON:
560	0	illegalAddress("Unexpected ';'", token);
561
562	1	case LEFT_ANGLE:
563	1	illegalAddress("Unexpected '<'", token);
564		}
565		// remember the previous token.
566	813	previous = token;
567	813	token = tokens.nextRealToken();
568		}
569		}
570
571
572		/**
573		* Scan the token stream, parsing off a group address. This
574		* will do some basic syntax validation, but will not actually
575		* validate any of the address information. Comments will be
576		* ignored.
577		*
578		* @param tokens The stream of tokens.
579		*
580		* @return The last token of the group address (the terminating ':").
581		*/
582	35	private AddressToken scanGroupAddress(TokenStream tokens) throws AddressException {
583		// A group does not require that there be anything between the ':' and ';". This is
584		// just a group with an empty list.
585	35	AddressToken token = tokens.nextRealToken();
586
587		// now scan until we reach the terminator. The only validation is done on illegal characters.
588	35	while (true) {
589	199	switch (token.type) {
590		// The following tokens are all valid in group addresses, so just skip over them.
591	83	case ATOM:
592	0	case QUOTED_LITERAL:
593	0	case DOMAIN_LITERAL:
594	25	case PERIOD:
595	25	case AT_SIGN:
596	20	case COMMA:
597	153	break;
598
599	1	case COLON:
600	1	illegalAddress("Nested group", token);
601
602		// route address within a group specifier....we need to at least verify the bracket nesting
603		// and higher level syntax of the route.
604	12	case LEFT_ANGLE:
605	12	scanRouteAddress(tokens, true);
606	11	break;
607
608		// the only allowed terminator is the ';'
609	1	case END_OF_TOKENS:
610	1	illegalAddress("Missing ';'", token);
611
612		// now for the illegal ones in this context.
613	32	case SEMICOLON:
614		// verify there's nothing illegal after this.
615	32	AddressToken next = tokens.nextRealToken();
616	32	if (next.type != COMMA && next.type != END_OF_TOKENS) {
617	0	illegalAddress("Illegal address", token);
618		}
619		// don't forget to put this back on...our caller will need it.
620	32	tokens.pushToken(next);
621	32	return token;
622
623	0	case RIGHT_ANGLE:
624	0	illegalAddress("Unexpected '>'", token);
625		}
626	164	token = tokens.nextRealToken();
627		}
628		}
629
630
631		/**
632		* Parse the provided internet address into a set of tokens. This
633		* phase only does a syntax check on the tokens. The interpretation
634		* of the tokens is the next phase.
635		*
636		* @exception AddressException
637		*/
638	326	private TokenStream tokenizeAddress() throws AddressException {
639
640		// get a list for the set of tokens
641	326	TokenStream tokens = new TokenStream();
642
643	326	end = addresses.length(); // our parsing end marker
644
645		// now scan along the string looking for the special characters in an internet address.
646	326	while (moreCharacters()) {
647	2624	char ch = currentChar();
648
649	2624	switch (ch) {
650		// start of a comment bit...ignore everything until we hit a closing paren.
651	55	case '(':
652	55	scanComment(tokens);
653	53	break;
654		// a closing paren found outside of normal processing.
655	0	case ')':
656	0	syntaxError("Unexpected ')'", position);
657
658
659		// start of a quoted string
660	48	case '"':
661	48	scanQuotedLiteral(tokens);
662	46	break;
663		// domain literal
664	37	case '[':
665	37	scanDomainLiteral(tokens);
666	32	break;
667
668		// a naked closing bracket...not valid except as part of a domain literal.
669	0	case ']':
670	0	syntaxError("Unexpected ']'", position);
671
672		// special character delimiters
673	119	case '<':
674	119	tokens.addToken(new AddressToken(LEFT_ANGLE, position));
675	119	nextChar();
676	119	break;
677
678		// a naked closing bracket...not valid without a starting one, but
679		// we need to handle this in context.
680	112	case '>':
681	112	tokens.addToken(new AddressToken(RIGHT_ANGLE, position));
682	112	nextChar();
683	112	break;
684	72	case ':':
685	72	tokens.addToken(new AddressToken(COLON, position));
686	72	nextChar();
687	72	break;
688	62	case ',':
689	62	tokens.addToken(new AddressToken(COMMA, position));
690	62	nextChar();
691	62	break;
692	424	case '.':
693	424	tokens.addToken(new AddressToken(PERIOD, position));
694	424	nextChar();
695	424	break;
696	49	case ';':
697	49	tokens.addToken(new AddressToken(SEMICOLON, position));
698	49	nextChar();
699	49	break;
700	345	case '@':
701	345	tokens.addToken(new AddressToken(AT_SIGN, position));
702	345	nextChar();
703	345	break;
704
705		// white space characters. These are mostly token delimiters, but there are some relaxed
706		// situations where they get processed, so we need to add a white space token for the first
707		// one we encounter in a span.
708	138	case ' ':
709	0	case '\t':
710	0	case '\r':
711	0	case '\n':
712		// add a single white space token
713	138	tokens.addToken(new AddressToken(WHITESPACE, position));
714
715	138	nextChar();
716		// step over any space characters, leaving us positioned either at the end
717		// or the first
718	138	while (moreCharacters()) {
719	138	char nextChar = currentChar();
720	138	if (nextChar == ' ' \|\| nextChar == '\t' \|\| nextChar == '\r' \|\| nextChar == '\n') {
721	0	nextChar();
722		}
723		else {
724	138	break;
725		}
726		}
727	138	break;
728
729		// potentially an atom...if it starts with an allowed atom character, we
730		// parse out the token, otherwise this is invalid.
731	1163	default:
732	1163	if (ch < 040 \|\| ch >= 0177) {
733	0	syntaxError("Illegal character in address", position);
734		}
735
736	1163	scanAtom(tokens);
737	1163	break;
738		}
739		}
740
741		// for this end marker, give an end position.
742	317	tokens.addToken(new AddressToken(END_OF_TOKENS, addresses.length()));
743	317	return tokens;
744		}
745
746
747		/**
748		* Step to the next character position while parsing.
749		*/
750	7401	private void nextChar() {
751	7401	position++;
752		}
753
754
755		/**
756		* Retrieve the character at the current parsing position.
757		*
758		* @return The current character.
759		*/
760	8556	private char currentChar() {
761	8556	return addresses.charAt(position);
762		}
763
764		/**
765		* Test if there are more characters left to parse.
766		*
767		* @return True if we've hit the last character, false otherwise.
768		*/
769	9028	private boolean moreCharacters() {
770	9028	return position < end;
771		}
772
773
774		/**
775		* Parse a quoted string as specified by the RFC822 specification.
776		*
777		* @param tokens The TokenStream where the parsed out token is added.
778		*/
779	48	private void scanQuotedLiteral(TokenStream tokens) throws AddressException {
780	48	StringBuffer value = new StringBuffer();
781
782		// save the start position for the token.
783	48	int startPosition = position;
784		// step over the quote delimiter.
785	48	nextChar();
786
787	48	while (moreCharacters()) {
788	292	char ch = currentChar();
789
790		// is this an escape char?
791	292	if (ch == '\\') {
792		// step past this, and grab the following character
793	13	nextChar();
794	13	if (!moreCharacters()) {
795	0	syntaxError("Missing '\"'", position);
796		}
797	13	value.append(currentChar());
798		}
799		// end of the string?
800	279	else if (ch == '"') {
801		// return the constructed string.
802	46	tokens.addToken(new AddressToken(value.toString(), QUOTED_LITERAL, position));
803		// step over the close delimiter for the benefit of the next token.
804	46	nextChar();
805	46	return;
806		}
807		// the RFC822 spec disallows CR characters.
808	233	else if (ch == '\r') {
809	1	syntaxError("Illegal line end in literal", position);
810		}
811		else
812		{
813	232	value.append(ch);
814		}
815	245	nextChar();
816		}
817		// missing delimiter
818	1	syntaxError("Missing '\"'", position);
819		}
820
821
822		/**
823		* Parse a domain literal as specified by the RFC822 specification.
824		*
825		* @param tokens The TokenStream where the parsed out token is added.
826		*/
827	37	private void scanDomainLiteral(TokenStream tokens) throws AddressException {
828	37	StringBuffer value = new StringBuffer();
829
830	37	int startPosition = position;
831		// step over the quote delimiter.
832	37	nextChar();
833
834	37	while (moreCharacters()) {
835	223	char ch = currentChar();
836
837		// is this an escape char?
838	223	if (ch == '\\') {
839		// because domain literals don't get extra escaping, we render them
840		// with the escaped characters intact. Therefore, append the '\' escape
841		// first, then append the escaped character without examination.
842	3	value.append(currentChar());
843		// step past this, and grab the following character
844	3	nextChar();
845	3	if (!moreCharacters()) {
846	0	syntaxError("Missing '\"'", position);
847		}
848	3	value.append(currentChar());
849		}
850		// end of the string?
851	220	else if (ch == ']') {
852		// return the constructed string.
853	32	tokens.addToken(new AddressToken(value.toString(), DOMAIN_LITERAL, startPosition));
854		// step over the close delimiter for the benefit of the next token.
855	32	nextChar();
856	32	return;
857		}
858		// the RFC822 spec says no nesting
859	188	else if (ch == '[') {
860	1	syntaxError("Unexpected '['", position);
861		}
862		// carriage returns are similarly illegal.
863	187	else if (ch == '\r') {
864	1	syntaxError("Illegal line end in domain literal", position);
865		}
866		else
867		{
868	186	value.append(ch);
869		}
870	189	nextChar();
871		}
872		// missing delimiter
873	3	syntaxError("Missing ']'", position);
874		}
875
876		/**
877		* Scan an atom in an internet address, using the RFC822 rules
878		* for atom delimiters.
879		*
880		* @param tokens The TokenStream where the parsed out token is added.
881		*/
882	1163	private void scanAtom(TokenStream tokens) throws AddressException {
883	1163	int start = position;
884	1163	nextChar();
885	1163	while (moreCharacters()) {
886
887	4943	char ch = currentChar();
888	4943	if (isAtom(ch)) {
889	3933	nextChar();
890		}
891		else {
892	1010	break;
893		}
894		}
895
896		// return the scanned part of the string.
897	1163	tokens.addToken(new AddressToken(addresses.substring(start, position), ATOM, start));
898		}
899
900
901		/**
902		* Parse an internet address comment field as specified by
903		* RFC822. Includes support for quoted characters and nesting.
904		*
905		* @param tokens The TokenStream where the parsed out token is added.
906		*/
907	55	private void scanComment(TokenStream tokens) throws AddressException {
908	55	StringBuffer value = new StringBuffer();
909
910	55	int startPosition = position;
911		// step past the start character
912	55	nextChar();
913
914		// we're at the top nesting level on the comment.
915	55	int nest = 1;
916
917		// scan while we have more characters.
918	55	while (moreCharacters()) {
919	315	char ch = currentChar();
920		// escape character?
921	315	if (ch == '\\') {
922		// step over this...if escaped, we must have at least one more character
923		// in the string.
924	2	nextChar();
925	2	if (!moreCharacters()) {
926	0	syntaxError("Missing ')'", position);
927		}
928	2	value.append(currentChar());
929		}
930		// nested comment?
931	313	else if (ch == '(') {
932		// step the nesting level...we treat the comment as a single unit, with the delimiters
933		// for the nested comments embedded in the middle
934	1	nest++;
935	1	value.append(ch);
936		}
937		// is this the comment close?
938	312	else if (ch == ')') {
939		// reduce the nesting level. If we still have more to process, add the delimiter character
940		// and keep going.
941	54	nest--;
942	54	if (nest > 0) {
943	1	value.append(ch);
944		}
945		else {
946		// step past this and return. The outermost comment delimiter is not included in
947		// the string value, since this is frequently used as personal data on the
948		// InternetAddress objects.
949	53	nextChar();
950	53	tokens.addToken(new AddressToken(value.toString(), COMMENT, startPosition));
951	53	return;
952		}
953		}
954	258	else if (ch == '\r') {
955	1	syntaxError("Illegal line end in comment", position);
956		}
957		else {
958	257	value.append(ch);
959		}
960		// step to the next character.
961	261	nextChar();
962		}
963		// ran out of data before seeing the closing bit, not good
964	1	syntaxError("Missing ')'", position);
965		}
966
967
968		/**
969		* Validate the syntax of an RFC822 group internet address specification.
970		*
971		* @param tokens The stream of tokens for the address.
972		*
973		* @exception AddressException
974		*/
975	31	private void validateGroup(TokenStream tokens) throws AddressException {
976		// we know already this is an address in the form "phrase:group;". Now we need to validate the
977		// elements.
978
979	31	int phraseCount = 0;
980
981	31	AddressToken token = tokens.nextRealToken();
982		// now scan to the semi color, ensuring we have only word or comment tokens.
983	31	while (token.type != COLON) {
984		// only these tokens are allowed here.
985	40	if (token.type != ATOM && token.type != QUOTED_LITERAL) {
986	0	invalidToken(token);
987		}
988	40	phraseCount++;
989	40	token = tokens.nextRealToken();
990		}
991
992
993		// RFC822 groups require a leading phrase in group specifiers.
994	31	if (phraseCount == 0) {
995	1	illegalAddress("Missing group identifier phrase", token);
996		}
997
998		// now we do the remainder of the parsing using the initial phrase list as the sink...the entire
999		// address will be converted to a string later.
1000
1001		// ok, we only know this has been valid up to the ":", now we have some real checks to perform.
1002	30	while (true) {
1003		// go scan off a mailbox. if everything goes according to plan, we should be positioned at either
1004		// a comma or a semicolon.
1005	48	validateGroupMailbox(tokens);
1006
1007	48	token = tokens.nextRealToken();
1008
1009		// we're at the end of the group. Make sure this is truely the end.
1010	48	if (token.type == SEMICOLON) {
1011	30	token = tokens.nextRealToken();
1012	30	if (token.type != END_OF_TOKENS) {
1013	0	illegalAddress("Illegal group address", token);
1014		}
1015	30	return;
1016		}
1017
1018		// if not a semicolon, this better be a comma.
1019	18	else if (token.type != COMMA) {
1020	0	illegalAddress("Illegal group address", token);
1021		}
1022		}
1023		}
1024
1025
1026		/**
1027		* Validate the syntax of single mailbox within a group address.
1028		*
1029		* @param tokens The stream of tokens representing the address.
1030		*
1031		* @exception AddressException
1032		*/
1033	48	private void validateGroupMailbox(TokenStream tokens) throws AddressException {
1034	48	AddressToken first = tokens.nextRealToken();
1035		// is this just a null address in the list? then push the terminator back and return.
1036	48	if (first.type == COMMA \|\| first.type == SEMICOLON) {
1037	14	tokens.pushToken(first);
1038	14	return;
1039		}
1040
1041		// now we need to scan ahead to see if we can determine the type.
1042	34	AddressToken token = first;
1043
1044
1045		// we need to scan forward to figure out what sort of address this is.
1046	62	while (first != null) {
1047	62	switch (token.type) {
1048		// until we know the context, these are all just ignored.
1049	0	case QUOTED_LITERAL:
1050	28	case ATOM:
1051	28	break;
1052
1053		// a LEFT_ANGLE indicates we have a full RFC822 mailbox form. The leading phrase
1054		// is the personal info. The address is inside the brackets.
1055	11	case LEFT_ANGLE:
1056	11	tokens.pushToken(first);
1057	11	validatePhrase(tokens, false);
1058	11	validateRouteAddr(tokens, true);
1059	11	return;
1060
1061		// we've hit a period as the first non-word token. This should be part of a local-part
1062		// of an address.
1063	0	case PERIOD:
1064		// we've hit an "@" as the first non-word token. This is probably a simple address in
1065		// the form "user@domain".
1066	21	case AT_SIGN:
1067	21	tokens.pushToken(first);
1068	21	validateAddressSpec(tokens);
1069	21	return;
1070
1071		// reached the end of string...this might be a null address, or one of the very simple name
1072		// forms used for non-strict RFC822 versions. Reset, and try that form
1073	1	case COMMA:
1074		// this is the end of the group...handle it like a comma for now.
1075	1	case SEMICOLON:
1076	2	tokens.pushToken(first);
1077	2	validateAddressSpec(tokens);
1078	2	return;
1079
1080	0	case END_OF_TOKENS:
1081	0	illegalAddress("Missing ';'", token);
1082
1083		}
1084	28	token = tokens.nextRealToken();
1085		}
1086		}
1087
1088
1089		/**
1090		* Utility method for throwing an AddressException caused by an
1091		* unexpected primitive token.
1092		*
1093		* @param token The token causing the problem (must not be a value type token).
1094		*
1095		* @exception AddressException
1096		*/
1097	0	private void invalidToken(AddressToken token) throws AddressException {
1098	0	illegalAddress("Unexpected '" + token.type + "'", token);
1099		}
1100
1101
1102		/**
1103		* Raise an error about illegal syntax.
1104		*
1105		* @param message The message used in the thrown exception.
1106		* @param position The parsing position within the string.
1107		*
1108		* @exception AddressException
1109		*/
1110	9	private void syntaxError(String message, int position) throws AddressException
1111		{
1112	9	throw new AddressException(message, addresses, position);
1113		}
1114
1115
1116		/**
1117		* Throw an exception based on the position of an invalid token.
1118		*
1119		* @param message The exception message.
1120		* @param token The token causing the error. This tokens position is used
1121		* in the exception information.
1122		*/
1123	16	private void illegalAddress(String message, AddressToken token) throws AddressException {
1124	16	throw new AddressException(message, addresses, token.position);
1125		}
1126
1127
1128		/**
1129		* Validate that a required phrase exists.
1130		*
1131		* @param tokens The set of tokens to validate. positioned at the phrase start.
1132		* @param required A flag indicating whether the phrase is optional or required.
1133		*
1134		* @exception AddressException
1135		*/
1136	11	private void validatePhrase(TokenStream tokens, boolean required) throws AddressException {
1137		// we need to have at least one WORD token in the phrase...everything is optional
1138		// after that.
1139	11	AddressToken token = tokens.nextRealToken();
1140	11	if (token.type != ATOM && token.type != QUOTED_LITERAL) {
1141	6	if (required) {
1142	0	illegalAddress("Missing group phrase", token);
1143		}
1144		}
1145
1146		// now scan forward to the end of the phrase
1147	11	token = tokens.nextRealToken();
1148	11	while (token.type == ATOM \|\| token.type == QUOTED_LITERAL) {
1149	6	token = tokens.nextRealToken();
1150		}
1151		}
1152
1153
1154		/**
1155		* validate a routeaddr specification
1156		*
1157		* @param tokens The tokens representing the address portion (personal information
1158		* already removed).
1159		* @param ingroup true indicates we're validating a route address inside a
1160		* group list. false indicates we're validating a standalone
1161		* address.
1162		*
1163		* @exception AddressException
1164		*/
1165	102	private void validateRouteAddr(TokenStream tokens, boolean ingroup) throws AddressException {
1166		// get the next real token.
1167	102	AddressToken token = tokens.nextRealToken();
1168		// if this is an at sign, then we have a list of domains to parse.
1169	102	if (token.type == AT_SIGN) {
1170		// push the marker token back in for the route parser, and step past that part.
1171	20	tokens.pushToken(token);
1172	20	validateRoute(tokens);
1173		}
1174		else {
1175		// we need to push this back on to validate the local part.
1176	82	tokens.pushToken(token);
1177		}
1178
1179		// now we expect to see an address spec.
1180	102	validateAddressSpec(tokens);
1181
1182	101	token = tokens.nextRealToken();
1183	101	if (ingroup) {
1184		// if we're validating within a group specification, the angle brackets are still there (and
1185		// required).
1186	11	if (token.type != RIGHT_ANGLE) {
1187	0	illegalAddress("Missing '>'", token);
1188		}
1189		}
1190		else {
1191		// the angle brackets were removed to make this an address, so we should be done. Make sure we
1192		// have a terminator here.
1193	90	if (token.type != END_OF_TOKENS) {
1194	0	illegalAddress("Illegal Address", token);
1195		}
1196		}
1197		}
1198
1199
1200
1201		/**
1202		* Validate a simple address in the form "user@domain".
1203		*
1204		* @param tokens The stream of tokens representing the address.
1205		*/
1206	186	private void validateSimpleAddress(TokenStream tokens) throws AddressException {
1207
1208		// the validation routines occur after addresses have been split into
1209		// personal and address forms. Therefore, our validation begins directly
1210		// with the first token.
1211	186	validateAddressSpec(tokens);
1212
1213		// get the next token and see if there is something here...anything but the terminator is an error
1214	183	AddressToken token = tokens.nextRealToken();
1215	183	if (token.type != END_OF_TOKENS) {
1216	6	illegalAddress("Illegal Address", token);
1217		}
1218		}
1219
1220		/**
1221		* Validate the addr-spec portion of an address. RFC822 requires
1222		* this be of the form "local-part@domain". However, javamail also
1223		* allows simple address of the form "local-part". We only require
1224		* the domain if an '@' is encountered.
1225		*
1226		* @param tokens
1227		*/
1228	311	private void validateAddressSpec(TokenStream tokens) throws AddressException {
1229		// all addresses, even the simple ones, must have at least a local part.
1230	311	validateLocalPart(tokens);
1231
1232		// now see if we have a domain portion to look at.
1233	307	AddressToken token = tokens.nextRealToken();
1234	307	if (token.type == AT_SIGN) {
1235	268	validateDomain(tokens);
1236		}
1237		else {
1238		// put this back for termination
1239	39	tokens.pushToken(token);
1240		}
1241
1242		}
1243
1244
1245		/**
1246		* Validate the route portion of a route-addr. This is a list
1247		* of domain values in the form 1#("@" domain) ":".
1248		*
1249		* @param tokens The token stream holding the address information.
1250		*/
1251	20	private void validateRoute(TokenStream tokens) throws AddressException {
1252	20	while (true) {
1253	56	AddressToken token = tokens.nextRealToken();
1254		// if this is the first part of the list, go parse off a domain
1255	56	if (token.type == AT_SIGN) {
1256	28	validateDomain(tokens);
1257		}
1258		// another element in the list? Go around again
1259	28	else if (token.type == COMMA) {
1260	8	continue;
1261		}
1262		// the list is terminated by a colon...stop this part of the validation once we hit one.
1263	20	else if (token.type == COLON) {
1264	20	return;
1265		}
1266		// the list is terminated by a colon. If this isn't one of those, we have an error.
1267		else {
1268	0	illegalAddress("Missing ':'", token);
1269		}
1270		}
1271		}
1272
1273
1274		/**
1275		* Parse the local part of an address spec. The local part
1276		* is a series of "words" separated by ".".
1277		*/
1278	311	private void validateLocalPart(TokenStream tokens) throws AddressException {
1279	311	while (true) {
1280		// get the token.
1281	389	AddressToken token = tokens.nextRealToken();
1282
1283		// this must be either an atom or a literal.
1284	389	if (token.type != ATOM && token.type != QUOTED_LITERAL) {
1285	4	illegalAddress("Invalid local part", token);
1286		}
1287
1288		// get the next token (white space and comments ignored)
1289	385	token = tokens.nextRealToken();
1290		// if this is a period, we continue parsing
1291	385	if (token.type != PERIOD) {
1292	307	tokens.pushToken(token);
1293		// return the token
1294	307	return;
1295		}
1296		}
1297		}
1298
1299
1300
1301		/**
1302		* Parse a domain name of the form sub-domain *("." sub-domain).
1303		* a sub-domain is either an atom or a domain-literal.
1304		*/
1305	296	private void validateDomain(TokenStream tokens) throws AddressException {
1306	296	while (true) {
1307		// get the token.
1308	594	AddressToken token = tokens.nextRealToken();
1309
1310		// this must be either an atom or a domain literal.
1311	594	if (token.type != ATOM && token.type != DOMAIN_LITERAL) {
1312	0	illegalAddress("Invalid domain", token);
1313		}
1314
1315		// get the next token (white space is ignored)
1316	594	token = tokens.nextRealToken();
1317		// if this is a period, we continue parsing
1318	594	if (token.type != PERIOD) {
1319		// return the token
1320	296	tokens.pushToken(token);
1321	296	return;
1322		}
1323		}
1324		}
1325
1326		/**
1327		* Convert a list of word tokens into a phrase string. The
1328		* rules for this are a little hard to puzzle out, but there
1329		* is a logic to it. If the list is empty, the phrase is
1330		* just a null value.
1331		*
1332		* If we have a phrase, then the quoted strings need to
1333		* handled appropriately. In multi-token phrases, the
1334		* quoted literals are concatenated with the quotes intact,
1335		* regardless of content. Thus a phrase that comes in like this:
1336		*
1337		* "Geronimo" Apache
1338		*
1339		* gets converted back to the same string.
1340		*
1341		* If there is just a single token in the phrase, AND the token
1342		* is a quoted string AND the string does not contain embedded
1343		* special characters ("\.,@<>()[]:;), then the phrase
1344		* is expressed as an atom. Thus the literal
1345		*
1346		* "Geronimo"
1347		*
1348		* becomes
1349		*
1350		* Geronimo
1351		*
1352		* but
1353		*
1354		* "(Geronimo)"
1355		*
1356		* remains
1357		*
1358		* "(Geronimo)"
1359		*
1360		* Note that we're generating a canonical form of the phrase,
1361		* which removes comments and reduces linear whitespace down
1362		* to a single separator token.
1363		*
1364		* @param phrase An array list of phrase tokens (which may be empty).
1365		*/
1366	44	private String personalToString(TokenStream tokens) {
1367
1368		// no tokens in the stream? This is a null value.
1369	44	AddressToken token = tokens.nextToken();
1370
1371	44	if (token.type == END_OF_TOKENS) {
1372	0	return null;
1373		}
1374
1375	44	AddressToken next = tokens.nextToken();
1376
1377		// single element phrases get special treatment.
1378	44	if (next.type == END_OF_TOKENS) {
1379		// this can be used directly...if it contains special characters, quoting will be
1380		// performed when it's converted to a string value.
1381	27	return token.value;
1382		}
1383
1384		// reset to the beginning
1385	17	tokens.pushToken(token);
1386
1387		// have at least two tokens,
1388	17	StringBuffer buffer = new StringBuffer();
1389
1390		// get the first token. After the first, we add these as blank delimited values.
1391	17	token = tokens.nextToken();
1392	17	addTokenValue(token, buffer);
1393
1394	17	token = tokens.nextToken();
1395	17	while (token.type != END_OF_TOKENS) {
1396		// add a blank separator
1397	18	buffer.append(' ');
1398		// now add the next tokens value
1399	18	addTokenValue(token, buffer);
1400	18	token = tokens.nextToken();
1401		}
1402		// and return the canonicalized value
1403	17	return buffer.toString();
1404		}
1405
1406
1407		/**
1408		* take a canonicalized set of address tokens and reformat it back into a string value,
1409		* inserting whitespace where appropriate.
1410		*
1411		* @param tokens The set of tokens representing the address.
1412		*
1413		* @return The string value of the tokens.
1414		*/
1415	317	private String addressToString(TokenStream tokens) {
1416	317	StringBuffer buffer = new StringBuffer();
1417
1418		// this flag controls whether we insert a blank delimiter between tokens as
1419		// we advance through the list. Blanks are only inserted between consequtive value tokens.
1420		// Initially, this is false, then we flip it to true whenever we add a value token, and
1421		// back to false for any special character token.
1422	317	boolean spaceRequired = false;
1423
1424		// we use nextToken rather than nextRealToken(), since we need to process the comments also.
1425	317	AddressToken token = tokens.nextToken();
1426
1427		// now add each of the tokens
1428	317	while (token.type != END_OF_TOKENS) {
1429	1944	switch (token.type) {
1430		// the word tokens are the only ones where we need to worry about adding
1431		// whitespace delimiters.
1432	1038	case ATOM:
1433	21	case QUOTED_LITERAL:
1434		// was the last token also a word? Insert a blank first.
1435	1059	if (spaceRequired) {
1436	11	buffer.append(' ');
1437		}
1438	1059	addTokenValue(token, buffer);
1439		// let the next iteration know we just added a word to the list.
1440	1059	spaceRequired = true;
1441	1059	break;
1442
1443		// these special characters are just added in. The constants for the character types
1444		// were carefully selected to be the character value in question. This allows us to
1445		// just append the value.
1446	11	case LEFT_ANGLE:
1447	11	case RIGHT_ANGLE:
1448	27	case COMMA:
1449	53	case COLON:
1450	319	case AT_SIGN:
1451	31	case SEMICOLON:
1452	403	case PERIOD:
1453	855	buffer.append((char)token.type);
1454		// no spaces around specials
1455	855	spaceRequired = false;
1456	855	break;
1457
1458		// Domain literals self delimiting...we can just append them and turn off the space flag.
1459	30	case DOMAIN_LITERAL:
1460	30	addTokenValue(token, buffer);
1461	30	spaceRequired = false;
1462	30	break;
1463
1464		// Comments are also self delimitin.
1465	0	case COMMENT:
1466	0	addTokenValue(token, buffer);
1467	0	spaceRequired = false;
1468	0	break;
1469		}
1470	1944	token = tokens.nextToken();
1471		}
1472	317	return buffer.toString();
1473		}
1474
1475
1476		/**
1477		* Append a value token on to a string buffer used to create
1478		* the canonicalized string value.
1479		*
1480		* @param token The token we're adding.
1481		* @param buffer The target string buffer.
1482		*/
1483	1124	private void addTokenValue(AddressToken token, StringBuffer buffer) {
1484		// atom values can be added directly.
1485	1124	if (token.type == ATOM) {
1486	1064	buffer.append(token.value);
1487		}
1488		// a literal value? Add this as a quoted string
1489	60	else if (token.type == QUOTED_LITERAL) {
1490	29	buffer.append(formatQuotedString(token.value));
1491		}
1492		// could be a domain literal of the form "[value]"
1493	31	else if (token.type == DOMAIN_LITERAL) {
1494	30	buffer.append('[');
1495	30	buffer.append(token.value);
1496	30	buffer.append(']');
1497		}
1498		// comments also have values
1499	1	else if (token.type == COMMENT) {
1500	1	buffer.append('(');
1501	1	buffer.append(token.value);
1502	1	buffer.append(')');
1503		}
1504		}
1505
1506
1507
1508		private static final byte[] CHARMAP = {
1509		0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x06, 0x02, 0x06, 0x02, 0x02, 0x06, 0x02, 0x02,
1510		0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
1511		0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00,
1512		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x00, 0x01, 0x00,
1513
1514		0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1515		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x00, 0x00,
1516		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1517		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
1518		};
1519
1520		private static final byte FLG_SPECIAL = 1;
1521		private static final byte FLG_CONTROL = 2;
1522		private static final byte FLG_SPACE = 4;
1523
1524	0	private static boolean isSpace(char ch) {
1525	0	if (ch > '\u007f') {
1526	0	return false;
1527		} else {
1528	0	return (CHARMAP[ch] & FLG_SPACE) != 0;
1529		}
1530		}
1531
1532		/**
1533		* Quick test to see if a character is an allowed atom character
1534		* or not.
1535		*
1536		* @param ch The test character.
1537		*
1538		* @return true if this character is allowed in atoms, false for any
1539		* control characters, special characters, or blanks.
1540		*/
1541	5158	public static boolean isAtom(char ch) {
1542	5158	if (ch > '\u007f') {
1543	0	return false;
1544		}
1545	5158	else if (ch == ' ') {
1546	57	return false;
1547		}
1548		else {
1549	5101	return (CHARMAP[ch] & (FLG_SPECIAL \| FLG_CONTROL)) == 0;
1550		}
1551		}
1552
1553		/**
1554		* Tests one string to determine if it contains any of the
1555		* characters in a supplied test string.
1556		*
1557		* @param s The string we're testing.
1558		* @param chars The set of characters we're testing against.
1559		*
1560		* @return true if any of the characters is found, false otherwise.
1561		*/
1562	173	public static boolean containsCharacters(String s, String chars)
1563		{
1564	173	for (int i = 0; i < s.length(); i++) {
1565	2100	if (chars.indexOf(s.charAt(i)) >= 0) {
1566	62	return true;
1567		}
1568		}
1569	111	return false;
1570		}
1571
1572
1573		/**
1574		* Tests if a string contains any non-special characters that
1575		* would require encoding the value as a quoted string rather
1576		* than a simple atom value.
1577		*
1578		* @param s The test string.
1579		*
1580		* @return True if the string contains only blanks or allowed atom
1581		* characters.
1582		*/
1583	46	public static boolean containsSpecials(String s)
1584		{
1585	46	for (int i = 0; i < s.length(); i++) {
1586	242	char ch = s.charAt(i);
1587		// must be either a blank or an allowed atom char.
1588	242	if (ch == ' ' \|\| isAtom(ch)) {
1589	239	continue;
1590		}
1591		else {
1592	3	return true;
1593		}
1594		}
1595	43	return false;
1596		}
1597
1598
1599		/**
1600		* Tests if a string contains any non-special characters that
1601		* would require encoding the value as a quoted string rather
1602		* than a simple atom value.
1603		*
1604		* @param s The test string.
1605		*
1606		* @return True if the string contains only blanks or allowed atom
1607		* characters.
1608		*/
1609	0	public static boolean isAtom(String s)
1610		{
1611	0	for (int i = 0; i < s.length(); i++) {
1612	0	char ch = s.charAt(i);
1613		// must be an allowed atom character
1614	0	if (!isAtom(ch)) {
1615	0	return false;
1616		}
1617		}
1618	0	return true;
1619		}
1620
1621		/**
1622		* Apply RFC822 quoting rules to a literal string value. This
1623		* will search the string to see if there are any characters that
1624		* require special escaping, and apply the escapes. If the
1625		* string is just a string of blank-delimited atoms, the string
1626		* value is returned without quotes.
1627		*
1628		* @param s The source string.
1629		*
1630		* @return A version of the string as a valid RFC822 quoted literal.
1631		*/
1632	63	public static String quoteString(String s) {
1633
1634		// only backslash and double quote require escaping. If the string does not
1635		// contain any of these, then we can just slap on some quotes and go.
1636	63	if (s.indexOf('\\') == -1 && s.indexOf('"') == -1) {
1637		// if the string is an atom (or a series of blank-delimited atoms), we can just return it directly.
1638	46	if (!containsSpecials(s)) {
1639	43	return s;
1640		}
1641	3	StringBuffer buffer = new StringBuffer(s.length() + 2);
1642	3	buffer.append('"');
1643	3	buffer.append(s);
1644	3	buffer.append('"');
1645	3	return buffer.toString();
1646		}
1647
1648		// get a buffer sufficiently large for the string, two quote characters, and a "reasonable"
1649		// number of escaped values.
1650	17	StringBuffer buffer = new StringBuffer(s.length() + 10);
1651	17	buffer.append('"');
1652
1653		// now check all of the characters.
1654	17	for (int i = 0; i < s.length(); i++) {
1655	143	char ch = s.charAt(i);
1656		// character requiring escaping?
1657	143	if (ch == '\\' \|\| ch == '"') {
1658		// add an extra backslash
1659	26	buffer.append('\\');
1660		}
1661		// and add on the character
1662	143	buffer.append(ch);
1663		}
1664	17	buffer.append('"');
1665	17	return buffer.toString();
1666		}
1667
1668		/**
1669		* Apply RFC822 quoting rules to a literal string value. This
1670		* will search the string to see if there are any characters that
1671		* require special escaping, and apply the escapes. The returned
1672		* value is enclosed in quotes.
1673		*
1674		* @param s The source string.
1675		*
1676		* @return A version of the string as a valid RFC822 quoted literal.
1677		*/
1678	29	public static String formatQuotedString(String s) {
1679		// only backslash and double quote require escaping. If the string does not
1680		// contain any of these, then we can just slap on some quotes and go.
1681	29	if (s.indexOf('\\') == -1 && s.indexOf('"') == -1) {
1682	29	StringBuffer buffer = new StringBuffer(s.length() + 2);
1683	29	buffer.append('"');
1684	29	buffer.append(s);
1685	29	buffer.append('"');
1686	29	return buffer.toString();
1687		}
1688
1689		// get a buffer sufficiently large for the string, two quote characters, and a "reasonable"
1690		// number of escaped values.
1691	0	StringBuffer buffer = new StringBuffer(s.length() + 10);
1692	0	buffer.append('"');
1693
1694		// now check all of the characters.
1695	0	for (int i = 0; i < s.length(); i++) {
1696	0	char ch = s.charAt(i);
1697		// character requiring escaping?
1698	0	if (ch == '\\' \|\| ch == '"') {
1699		// add an extra backslash
1700	0	buffer.append('\\');
1701		}
1702		// and add on the character
1703	0	buffer.append(ch);
1704		}
1705	0	buffer.append('"');
1706	0	return buffer.toString();
1707		}
1708
1709		public class TokenStream {
1710		// the set of tokens in the parsed address list, as determined by RFC822 syntax rules.
1711		private List tokens;
1712
1713		// the current token position
1714		int currentToken = 0;
1715
1716
1717		/**
1718		* Default constructor for a TokenStream. This creates an
1719		* empty TokenStream for purposes of tokenizing an address.
1720		* It is the creator's responsibility to terminate the stream
1721		* with a terminator token.
1722		*/
1723	326	public TokenStream() {
1724	326	tokens = new ArrayList();
1725		}
1726
1727
1728		/**
1729		* Construct a TokenStream from a list of tokens. A terminator
1730		* token is added to the end.
1731		*
1732		* @param tokens An existing token list.
1733		*/
1734	387	public TokenStream(List tokens) {
1735	387	this.tokens = tokens;
1736	387	tokens.add(new AddressToken(END_OF_TOKENS, -1));
1737		}
1738
1739		/**
1740		* Add an address token to the token list.
1741		*
1742		* @param t The new token to add to the list.
1743		*/
1744	2932	public void addToken(AddressToken token) {
1745	2932	tokens.add(token);
1746		}
1747
1748		/**
1749		* Get the next token at the cursor position, advancing the
1750		* position accordingly.
1751		*
1752		* @return The token at the current token position.
1753		*/
1754	8904	public AddressToken nextToken() {
1755	8904	AddressToken token = (AddressToken)tokens.get(currentToken++);
1756		// we skip over white space tokens when operating in this mode, so
1757		// check the token and iterate until we get a non-white space.
1758	8904	while (token.type == WHITESPACE) {
1759	255	token = (AddressToken)tokens.get(currentToken++);
1760		}
1761	8904	return token;
1762		}
1763
1764
1765		/**
1766		* Get the next token at the cursor position, without advancing the
1767		* position.
1768		*
1769		* @return The token at the current token position.
1770		*/
1771	97	public AddressToken currentToken() {
1772		// return the current token and step the cursor
1773	97	return (AddressToken)tokens.get(currentToken);
1774		}
1775
1776
1777		/**
1778		* Get the next non-comment token from the string. Comments are ignored, except as personal information
1779		* for very simple address specifications.
1780		*
1781		* @return A token guaranteed not to be a whitespace token.
1782		*/
1783	4335	public AddressToken nextRealToken()
1784		{
1785	4335	AddressToken token = nextToken();
1786	4335	if (token.type == COMMENT) {
1787	0	token = nextToken();
1788		}
1789	4335	return token;
1790		}
1791
1792		/**
1793		* Push a token back on to the queue, making the index of this
1794		* token the current cursor position.
1795		*
1796		* @param token The token to push.
1797		*/
1798	1615	public void pushToken(AddressToken token) {
1799		// just reset the cursor to the token's index position.
1800	1615	currentToken = tokenIndex(token);
1801		}
1802
1803		/**
1804		* Get the next token after a given token, without advancing the
1805		* token position.
1806		*
1807		* @param token The token we're retrieving a token relative to.
1808		*
1809		* @return The next token in the list.
1810		*/
1811	0	public AddressToken nextToken(AddressToken token) {
1812	0	return (AddressToken)tokens.get(tokenIndex(token) + 1);
1813		}
1814
1815
1816		/**
1817		* Return the token prior to a given token.
1818		*
1819		* @param token The token used for the index.
1820		*
1821		* @return The token prior to the index token in the list.
1822		*/
1823	241	public AddressToken previousToken(AddressToken token) {
1824	241	return (AddressToken)tokens.get(tokenIndex(token) - 1);
1825		}
1826
1827
1828		/**
1829		* Retrieve a token at a given index position.
1830		*
1831		* @param index The target index.
1832		*/
1833	0	public AddressToken getToken(int index)
1834		{
1835	0	return (AddressToken)tokens.get(index);
1836		}
1837
1838
1839		/**
1840		* Retrieve the index of a particular token in the stream.
1841		*
1842		* @param token The target token.
1843		*
1844		* @return The index of the token within the stream. Returns -1 if this
1845		* token is somehow not in the stream.
1846		*/
1847	2630	public int tokenIndex(AddressToken token) {
1848	2630	return tokens.indexOf(token);
1849		}
1850
1851
1852		/**
1853		* Extract a new TokenStream running from the start token to the
1854		* token preceeding the end token.
1855		*
1856		* @param start The starting token of the section.
1857		* @param end The last token (+1) for the target section.
1858		*
1859		* @return A new TokenStream object for processing this section of tokens.
1860		*/
1861	387	public TokenStream section(AddressToken start, AddressToken end) {
1862	387	int startIndex = tokenIndex(start);
1863	387	int endIndex = tokenIndex(end);
1864
1865		// List.subList() returns a list backed by the original list. Since we need to add a
1866		// terminator token to this list when we take the sublist, we need to manually copy the
1867		// references so we don't end up munging the original list.
1868	387	ArrayList list = new ArrayList(endIndex - startIndex + 2);
1869
1870	387	for (int i = startIndex; i <= endIndex; i++) {
1871	2222	list.add(tokens.get(i));
1872		}
1873	387	return new TokenStream(list);
1874		}
1875
1876
1877		/**
1878		* Reset the token position back to the beginning of the
1879		* stream.
1880		*/
1881	317	public void reset() {
1882	317	currentToken = 0;
1883		}
1884
1885		/**
1886		* Scan forward looking for a non-blank token.
1887		*
1888		* @return The first non-blank token in the stream.
1889		*/
1890	30	public AddressToken getNonBlank()
1891		{
1892	30	AddressToken token = currentToken();
1893	30	while (token.type == WHITESPACE) {
1894	0	currentToken++;
1895	0	token = currentToken();
1896		}
1897	30	return token;
1898		}
1899
1900
1901		/**
1902		* Extract a blank delimited token from a TokenStream. A blank
1903		* delimited token is the set of tokens up to the next real whitespace
1904		* token (comments not included).
1905		*
1906		* @return A TokenStream object with the new set of tokens.
1907		*/
1908	30	public TokenStream getBlankDelimitedToken()
1909		{
1910		// get the next non-whitespace token.
1911	30	AddressToken first = getNonBlank();
1912		// if this is the end, we return null.
1913	30	if (first.type == END_OF_TOKENS) {
1914	15	return null;
1915		}
1916
1917	15	AddressToken last = first;
1918
1919		// the methods for retrieving tokens skip over whitespace, so we're going to process this
1920		// by index.
1921	15	currentToken++;
1922
1923	15	AddressToken token = currentToken();
1924	15	while (true) {
1925		// if this is our marker, then pluck out the section and return it.
1926	67	if (token.type == END_OF_TOKENS \|\| token.type == WHITESPACE) {
1927	15	return section(first, last);
1928		}
1929	52	last = token;
1930	52	currentToken++;
1931		// we accept any and all tokens here.
1932	52	token = currentToken();
1933		}
1934		}
1935
1936		/**
1937		* Return the index of the current cursor position.
1938		*
1939		* @return The integer index of the current token.
1940		*/
1941	0	public int currentIndex() {
1942	0	return currentToken;
1943		}
1944
1945	0	public void dumpTokens()
1946		{
1947	0	System.out.println(">>>>>>>>> Start dumping TokenStream tokens");
1948	0	for (int i = 0; i < tokens.size(); i++) {
1949	0	System.out.println("-------- Token: " + tokens.get(i));
1950		}
1951
1952	0	System.out.println("++++++++ cursor position=" + currentToken);
1953	0	System.out.println(">>>>>>>>> End dumping TokenStream tokens");
1954		}
1955		}
1956
1957
1958		/**
1959		* Simple utility class for representing address tokens.
1960		*/
1961		public class AddressToken {
1962
1963		// the token type
1964		int type;
1965
1966		// string value of the token (can be null)
1967		String value;
1968
1969		// position of the token within the address string.
1970		int position;
1971
1972	2025	AddressToken(int type, int position)
1973		{
1974	2025	this.type = type;
1975	2025	this.value = null;
1976	2025	this.position = position;
1977		}
1978
1979	1294	AddressToken(String value, int type, int position)
1980		{
1981	1294	this.type = type;
1982	1294	this.value = value;
1983	1294	this.position = position;
1984		}
1985
1986	0	public String toString()
1987		{
1988	0	if (type == END_OF_TOKENS) {
1989	0	return "AddressToken: type=END_OF_TOKENS";
1990		}
1991	0	if (value == null) {
1992	0	return "AddressToken: type=" + (char)type;
1993		}
1994		else {
1995	0	return "AddressToken: type=" + (char)type + " value=" + value;
1996		}
1997		}
1998		}
1999		}
2000