Clover coverage report - Maven Clover report
Coverage timestamp: Sun Aug 20 2006 04:01:04 PDT
file stats: LOC: 2,000   Methods: 58
NCLOC: 988   Classes: 3
 
 Source file Conditionals Statements Methods TOTAL
AddressParser.java 76.1% 86.1% 86.2% 83.6%
coverage coverage
 1    /**
 2    *
 3    * Copyright 2003-2004 The Apache Software Foundation
 4    *
 5    * Licensed under the Apache License, Version 2.0 (the "License");
 6    * you may not use this file except in compliance with the License.
 7    * You may obtain a copy of the License at
 8    *
 9    * http://www.apache.org/licenses/LICENSE-2.0
 10    *
 11    * Unless required by applicable law or agreed to in writing, software
 12    * distributed under the License is distributed on an "AS IS" BASIS,
 13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14    * See the License for the specific language governing permissions and
 15    * limitations under the License.
 16    */
 17   
 18    package javax.mail.internet;
 19   
 20    import java.io.UnsupportedEncodingException;
 21    import java.lang.reflect.Array;
 22    import java.util.ArrayList;
 23    import java.util.List;
 24   
 25    class AddressParser {
 26   
 27    // the validation strictness levels, from most lenient to most conformant.
 28    static public final int NONSTRICT = 0;
 29    static public final int PARSE_HEADER = 1;
 30    static public final int STRICT = 2;
 31   
 32    // different mailbox types
 33    static protected final int UNKNOWN = 0;
 34    static protected final int ROUTE_ADDR = 1;
 35    static protected final int GROUP_ADDR = 2;
 36    static protected final int SIMPLE_ADDR = 3;
 37   
 38    // constants for token types.
 39    static protected final int END_OF_TOKENS = '\0';
 40    static protected final int PERIOD = '.';
 41    static protected final int LEFT_ANGLE = '<';
 42    static protected final int RIGHT_ANGLE = '>';
 43    static protected final int COMMA = ',';
 44    static protected final int AT_SIGN = '@';
 45    static protected final int SEMICOLON = ';';
 46    static protected final int COLON = ':';
 47    static protected final int QUOTED_LITERAL = '"';
 48    static protected final int DOMAIN_LITERAL = '[';
 49    static protected final int COMMENT = '(';
 50    static protected final int ATOM = 'A';
 51    static protected final int WHITESPACE = ' ';
 52   
 53   
 54    // the string we're parsing
 55    private String addresses;
 56    // the current parsing position
 57    private int position;
 58    // the end position of the string
 59    private int end;
 60    // the strictness flag
 61    private int validationLevel;
 62   
 63  326 public AddressParser(String addresses, int validation) {
 64  326 this.addresses = addresses;
 65  326 validationLevel = validation;
 66    }
 67   
 68   
 69    /**
 70    * Parse an address list into an array of internet addresses.
 71    *
 72    * @return An array containing all of the non-null addresses in the list.
 73    * @exception AddressException
 74    * Thrown for any validation errors.
 75    */
 76  187 public InternetAddress[] parseAddressList() throws AddressException
 77    {
 78    // get the address as a set of tokens we can process.
 79  187 TokenStream tokens = tokenizeAddress();
 80   
 81    // get an array list accumulator.
 82  179 ArrayList addressList = new ArrayList();
 83   
 84    // we process sections of the token stream until we run out of tokens.
 85  179 while (true) {
 86    // parse off a single address. Address lists can have null elements,
 87    // so this might return a null value. The null value does not get added
 88    // to the address accumulator.
 89  199 addressList.addAll(parseSingleAddress(tokens, false));
 90    // This token should be either a "," delimiter or a stream terminator. If we're
 91    // at the end, time to get out.
 92  189 AddressToken token = tokens.nextToken();
 93  189 if (token.type == END_OF_TOKENS) {
 94  169 break;
 95    }
 96    }
 97   
 98  169 return (InternetAddress [])addressList.toArray(new InternetAddress[0]);
 99    }
 100   
 101   
 102    /**
 103    * Parse a single internet address. This must be a single address,
 104    * not an address list.
 105    *
 106    * @exception AddressException
 107    */
 108  107 public InternetAddress parseAddress() throws AddressException
 109    {
 110    // get the address as a set of tokens we can process.
 111  107 TokenStream tokens = tokenizeAddress();
 112   
 113    // parse off a single address. Address lists can have null elements,
 114    // so this might return a null value. The null value does not get added
 115    // to the address accumulator.
 116  107 List addressList = parseSingleAddress(tokens, false);
 117    // we must get exactly one address back from this.
 118  107 if (addressList.isEmpty()) {
 119  0 throw new AddressException("Null address", addresses, 0);
 120    }
 121    // this could be a simple list of blank delimited tokens. Ensure we only got one back.
 122  107 if (addressList.size() > 1) {
 123  0 throw new AddressException("Illegal Address", addresses, 0);
 124    }
 125   
 126    // This token must be a stream stream terminator, or we have an error.
 127  107 AddressToken token = tokens.nextToken();
 128  107 if (token.type != END_OF_TOKENS) {
 129  0 illegalAddress("Illegal Address", token);
 130    }
 131   
 132  107 return (InternetAddress)addressList.get(0);
 133    }
 134   
 135   
 136    /**
 137    * Validate an internet address. This must be a single address,
 138    * not a list of addresses. The address also must not contain
 139    * and personal information to be valid.
 140    *
 141    * @exception AddressException
 142    */
 143  18 public void validateAddress() throws AddressException
 144    {
 145    // get the address as a set of tokens we can process.
 146  18 TokenStream tokens = tokenizeAddress();
 147   
 148    // parse off a single address. Address lists can have null elements,
 149    // so this might return a null value. The null value does not get added
 150    // to the address accumulator.
 151  17 List addressList = parseSingleAddress(tokens, false);
 152  11 if (addressList.isEmpty()) {
 153  0 throw new AddressException("Null address", addresses, 0);
 154    }
 155   
 156    // this could be a simple list of blank delimited tokens. Ensure we only got one back.
 157  11 if (addressList.size() > 1) {
 158  0 throw new AddressException("Illegal Address", addresses, 0);
 159    }
 160   
 161  11 InternetAddress address = (InternetAddress)addressList.get(0);
 162   
 163    // validation occurs on an address that's already been split into personal and address
 164    // data.
 165  11 if (address.personal != null) {
 166  0 throw new AddressException("Illegal Address", addresses, 0);
 167    }
 168    // This token must be a stream stream terminator, or we have an error.
 169  11 AddressToken token = tokens.nextToken();
 170  11 if (token.type != END_OF_TOKENS) {
 171  0 illegalAddress("Illegal Address", token);
 172    }
 173    }
 174   
 175   
 176    /**
 177    * Extract the set of address from a group Internet specification.
 178    *
 179    * @return An array containing all of the non-null addresses in the list.
 180    * @exception AddressException
 181    */
 182  14 public InternetAddress[] extractGroupList() throws AddressException
 183    {
 184    // get the address as a set of tokens we can process.
 185  14 TokenStream tokens = tokenizeAddress();
 186   
 187    // get an array list accumulator.
 188  14 ArrayList addresses = new ArrayList();
 189   
 190  14 AddressToken token = tokens.nextToken();
 191   
 192    // scan forward to the ':' that starts the group list. If we don't find one,
 193    // this is an exception.
 194  14 while (token.type != COLON) {
 195  14 if (token.type == END_OF_TOKENS) {
 196  0 illegalAddress("Missing ':'", token);
 197    }
 198  14 token = tokens.nextToken();
 199    }
 200   
 201    // we process sections of the token stream until we run out of tokens.
 202  14 while (true) {
 203    // parse off a single address. Address lists can have null elements,
 204    // so this might return a null value. The null value does not get added
 205    // to the address accumulator.
 206  26 addresses.addAll(parseSingleAddress(tokens, true));
 207    // This token should be either a "," delimiter or a group terminator. If we're
 208    // at the end, this is an error.
 209  26 token = tokens.nextToken();
 210  26 if (token.type == SEMICOLON) {
 211  14 break;
 212    }
 213  12 else if (token.type == END_OF_TOKENS) {
 214  0 illegalAddress("Missing ';'", token);
 215    }
 216    }
 217   
 218  14 return (InternetAddress [])addresses.toArray(new InternetAddress[0]);
 219    }
 220   
 221   
 222    /**
 223    * Parse out a single address from a string from a string
 224    * of address tokens, returning an InternetAddress object that
 225    * represents the address.
 226    *
 227    * @param tokens The token source for this address.
 228    *
 229    * @return A parsed out and constructed InternetAddress object for
 230    * the next address. Returns null if this is an "empty"
 231    * address in a list.
 232    * @exception AddressException
 233    */
 234  349 private List parseSingleAddress(TokenStream tokens, boolean inGroup) throws AddressException
 235    {
 236  349 List parsedAddresses = new ArrayList();
 237   
 238    // index markers for personal information
 239  349 AddressToken personalStart = null;
 240  349 AddressToken personalEnd = null;
 241   
 242    // and similar bits for the address information.
 243  349 AddressToken addressStart = null;
 244  349 AddressToken addressEnd = null;
 245   
 246    // there is a fall-back set of rules allowed that will parse the address as a set of blank delimited
 247    // tokens. However, we do NOT allow this if we encounter any tokens that fall outside of these
 248    // rules. For example, comment fields and quoted strings will disallow the very lenient rule set.
 249  349 boolean nonStrictRules = true;
 250   
 251    // we don't know the type of address yet
 252  349 int addressType = UNKNOWN;
 253   
 254    // the parsing goes in two stages. Stage one runs through the tokens locating the bounds
 255    // of the address we're working on, resolving the personal information, and also validating
 256    // some of the larger scale syntax features of an address (matched delimiters for routes and
 257    // groups, invalid nesting checks, etc.).
 258   
 259    // get the next token from the queue and save this. We're going to scan ahead a bit to
 260    // figure out what type of address we're looking at, then reset to do the actually parsing
 261    // once we've figured out a form.
 262  349 AddressToken first = tokens.nextToken();
 263    // push it back on before starting processing.
 264  349 tokens.pushToken(first);
 265   
 266    // scan ahead for a trigger token that tells us what we've got.
 267  349 while (addressType == UNKNOWN) {
 268   
 269  1458 AddressToken token = tokens.nextToken();
 270  1458 switch (token.type) {
 271    // skip these for now...after we've processed everything and found that this is a simple
 272    // address form, then we'll check for a leading comment token in the first position and use
 273    // if as personal information.
 274  53 case COMMENT:
 275    // comments do, however, denote that this must be parsed according to RFC822 rules.
 276  53 nonStrictRules = false;
 277  53 break;
 278   
 279    // a semi-colon when processing a group is an address terminator. we need to
 280    // process this like a comma then
 281  14 case SEMICOLON:
 282  14 if (inGroup) {
 283    // we need to push the terminator back on for the caller to see.
 284  14 tokens.pushToken(token);
 285    // if we've not tagged any tokens as being the address beginning, so this must be a
 286    // null address.
 287  14 if (addressStart == null) {
 288    // just return the empty list from this.
 289  2 return parsedAddresses;
 290    }
 291    // the end token is the back part.
 292  12 addressEnd = tokens.previousToken(token);
 293    // without a '<' for a route addr, we can't distinguish address tokens from personal data.
 294    // We'll use a leading comment, if there is one.
 295  12 personalStart = null;
 296    // this is just a simple form.
 297  12 addressType = SIMPLE_ADDR;
 298  12 break;
 299    }
 300   
 301    // NOTE: The above falls through if this is not a group.
 302   
 303    // any of these tokens are a real token that can be the start of an address. Many of
 304    // them are not valid as first tokens in this context, but we flag them later if validation
 305    // has been requested. For now, we just mark these as the potential address start.
 306  25 case DOMAIN_LITERAL:
 307  42 case QUOTED_LITERAL:
 308    // this set of tokens require fuller RFC822 parsing, so turn off the flag.
 309  67 nonStrictRules = false;
 310   
 311  615 case ATOM:
 312  169 case AT_SIGN:
 313  205 case PERIOD:
 314    // if we're not determined the start of the address yet, then check to see if we
 315    // need to consider this the personal start.
 316  1056 if (addressStart == null) {
 317  275 if (personalStart == null) {
 318  275 personalStart = token;
 319    }
 320    // This is the first real token of the address, which at this point can
 321    // be either the personal info or the first token of the address. If we hit
 322    // an address terminator without encountering either a route trigger or group
 323    // trigger, then this is the real address.
 324  275 addressStart = token;
 325    }
 326  1056 break;
 327   
 328    // a LEFT_ANGLE indicates we have a full RFC822 mailbox form. The leading phrase
 329    // is the personal info. The address is inside the brackets.
 330  102 case LEFT_ANGLE:
 331    // a route address automatically switches off the blank-delimited token mode.
 332  102 nonStrictRules = false;
 333    // this is a route address
 334  102 addressType = ROUTE_ADDR;
 335    // the address is placed in the InternetAddress object without the route
 336    // brackets, so our start is one past this.
 337  102 addressStart = tokens.nextRealToken();
 338    // push this back on the queue so the scanner picks it up properly.
 339  102 tokens.pushToken(addressStart);
 340    // make sure we flag the end of the personal section too.
 341  102 if (personalStart != null) {
 342  45 personalEnd = tokens.previousToken(token);
 343    }
 344    // scan the rest of a route address.
 345  102 addressEnd = scanRouteAddress(tokens, false);
 346  100 break;
 347   
 348    // a COLON indicates this is a group specifier...parse the group.
 349  35 case COLON:
 350    // Colons would not be valid in simple lists, so turn it off.
 351  35 nonStrictRules = false;
 352    // if we're scanning a group, we shouldn't encounter a ":". This is a
 353    // recursion error if found.
 354  35 if (inGroup) {
 355  0 illegalAddress("Nested group element", token);
 356    }
 357  35 addressType = GROUP_ADDR;
 358    // groups don't have any personal sections.
 359  35 personalStart = null;
 360    // our real start was back at the beginning
 361  35 addressStart = first;
 362  35 addressEnd = scanGroupAddress(tokens);
 363  32 break;
 364   
 365    // a semi colon can the same as a comma if we're processing a group.
 366   
 367   
 368    // reached the end of string...this might be a null address, or one of the very simple name
 369    // forms used for non-strict RFC822 versions. Reset, and try that form
 370  177 case END_OF_TOKENS:
 371    // if we're scanning a group, we shouldn't encounter an end token. This is an
 372    // error if found.
 373  177 if (inGroup) {
 374  0 illegalAddress("Missing ';'", token);
 375    }
 376   
 377    // NOTE: fall through from above.
 378   
 379    // this is either a terminator for an address list or a a group terminator.
 380  21 case COMMA:
 381    // we need to push the terminator back on for the caller to see.
 382  198 tokens.pushToken(token);
 383    // if we've not tagged any tokens as being the address beginning, so this must be a
 384    // null address.
 385  198 if (addressStart == null) {
 386    // just return the empty list from this.
 387  14 return parsedAddresses;
 388    }
 389    // the end token is the back part.
 390  184 addressEnd = tokens.previousToken(token);
 391    // without a '<' for a route addr, we can't distinguish address tokens from personal data.
 392    // We'll use a leading comment, if there is one.
 393  184 personalStart = null;
 394    // this is just a simple form.
 395  184 addressType = SIMPLE_ADDR;
 396  184 break;
 397   
 398    // right angle tokens are pushed, because parsing of the bracketing is not necessarily simple.
 399    // we need to flag these here.
 400  0 case RIGHT_ANGLE:
 401  0 illegalAddress("Unexpected '>'", token);
 402   
 403    }
 404    }
 405   
 406  328 String personal = null;
 407   
 408    // if we have personal data, then convert it to a string value.
 409  328 if (personalStart != null) {
 410  44 TokenStream personalTokens = tokens.section(personalStart, personalEnd);
 411  44 personal = personalToString(personalTokens);
 412    }
 413    // if we have a simple address, then check the first token to see if it's a comment. For simple addresses,
 414    // we'll accept the first comment token as the personal information.
 415    else {
 416  284 if (addressType == SIMPLE_ADDR && first.type == COMMENT) {
 417  19 personal = first.value;
 418    }
 419    }
 420   
 421  328 TokenStream addressTokens = tokens.section(addressStart, addressEnd);
 422   
 423    // if this is one of the strictly RFC822 types, then we always validate the address. If this is a
 424    // a simple address, then we only validate if strict parsing rules are in effect or we've been asked
 425    // to validate.
 426  328 if (validationLevel != PARSE_HEADER) {
 427  308 switch (addressType) {
 428  31 case GROUP_ADDR:
 429  31 validateGroup(addressTokens);
 430  30 break;
 431   
 432  91 case ROUTE_ADDR:
 433  91 validateRouteAddr(addressTokens, false);
 434  90 break;
 435   
 436  186 case SIMPLE_ADDR:
 437    // this is a conditional validation
 438  186 validateSimpleAddress(addressTokens);
 439  177 break;
 440    }
 441    }
 442   
 443    // more complex addresses and addresses containing tokens other than just simple addresses
 444    // need proper handling.
 445  317 if (validationLevel != NONSTRICT || addressType != SIMPLE_ADDR || !nonStrictRules) {
 446    // we might have traversed this already when we validated, so reset the
 447    // position before using this again.
 448  302 addressTokens.reset();
 449  302 String address = addressToString(addressTokens);
 450   
 451    // get the parsed out sections as string values.
 452  302 InternetAddress result = new InternetAddress();
 453  302 result.setAddress(address);
 454  302 try {
 455  302 result.setPersonal(personal);
 456    } catch (UnsupportedEncodingException e) {
 457    }
 458    // even though we have a single address, we return this as an array. Simple addresses
 459    // can be produce an array of items, so we need to return everything.
 460  302 parsedAddresses.add(result);
 461  302 return parsedAddresses;
 462    }
 463    else {
 464  15 addressTokens.reset();
 465   
 466  15 TokenStream nextAddress = addressTokens.getBlankDelimitedToken();
 467  15 while (nextAddress != null) {
 468  15 String address = addressToString(nextAddress);
 469    // get the parsed out sections as string values.
 470  15 InternetAddress result = new InternetAddress();
 471  15 result.setAddress(address);
 472  15 parsedAddresses.add(result);
 473  15 nextAddress = addressTokens.getBlankDelimitedToken();
 474    }
 475  15 return parsedAddresses;
 476    }
 477    }
 478   
 479   
 480    /**
 481    * Scan the token stream, parsing off a route addr spec. This
 482    * will do some basic syntax validation, but will not actually
 483    * validate any of the address information. Comments will be
 484    * discarded.
 485    *
 486    * @param tokens The stream of tokens.
 487    *
 488    * @return The last token of the route address (the one preceeding the
 489    * terminating '>'.
 490    */
 491  114 private AddressToken scanRouteAddress(TokenStream tokens, boolean inGroup) throws AddressException {
 492    // get the first token and ensure we have something between the "<" and ">".
 493  114 AddressToken token = tokens.nextRealToken();
 494    // the last processed non-whitespace token, which is the actual address end once the
 495    // right angle bracket is encountered.
 496   
 497  114 AddressToken previous = null;
 498   
 499    // if this route-addr has route information, the first token after the '<' must be a '@'.
 500    // this determines if/where a colon or comma can appear.
 501  114 boolean inRoute = token.type == AT_SIGN;
 502   
 503    // now scan until we reach the terminator. The only validation is done on illegal characters.
 504  114 while (true) {
 505  927 switch (token.type) {
 506    // The following tokens are all valid between the brackets, so just skip over them.
 507  437 case ATOM:
 508  4 case QUOTED_LITERAL:
 509  7 case DOMAIN_LITERAL:
 510  191 case PERIOD:
 511  143 case AT_SIGN:
 512  782 break;
 513   
 514  22 case COLON:
 515    // if not processing route information, this is illegal.
 516  22 if (!inRoute) {
 517  0 illegalAddress("Unexpected ':'", token);
 518    }
 519    // this is the end of the route information, the rules now change.
 520  22 inRoute = false;
 521  22 break;
 522   
 523  10 case COMMA:
 524    // if not processing route information, this is illegal.
 525  10 if (!inRoute) {
 526  1 illegalAddress("Unexpected ','", token);
 527    }
 528  9 break;
 529   
 530  111 case RIGHT_ANGLE:
 531    // if previous is null, we've had a route address which is "<>". That's illegal.
 532  111 if (previous == null) {
 533  0 illegalAddress("Illegal address", token);
 534    }
 535    // step to the next token..this had better be either a comma for another address or
 536    // the very end of the address list .
 537  111 token = tokens.nextRealToken();
 538    // if we're scanning part of a group, then the allowed terminators are either ',' or ';'.
 539  111 if (inGroup) {
 540  11 if (token.type != COMMA && token.type != SEMICOLON) {
 541  0 illegalAddress("Illegal address", token);
 542    }
 543    }
 544    // a normal address should have either a ',' for a list or the end.
 545    else {
 546  100 if (token.type != COMMA && token.type != END_OF_TOKENS) {
 547  0 illegalAddress("Illegal address", token);
 548    }
 549    }
 550    // we need to push the termination token back on.
 551  111 tokens.pushToken(token);
 552    // return the previous token as the updated position.
 553  111 return previous;
 554   
 555  1 case END_OF_TOKENS:
 556  1 illegalAddress("Missing '>'", token);
 557   
 558    // now for the illegal ones in this context.
 559  0 case SEMICOLON:
 560  0 illegalAddress("Unexpected ';'", token);
 561   
 562  1 case LEFT_ANGLE:
 563  1 illegalAddress("Unexpected '<'", token);
 564    }
 565    // remember the previous token.
 566  813 previous = token;
 567  813 token = tokens.nextRealToken();
 568    }
 569    }
 570   
 571   
 572    /**
 573    * Scan the token stream, parsing off a group address. This
 574    * will do some basic syntax validation, but will not actually
 575    * validate any of the address information. Comments will be
 576    * ignored.
 577    *
 578    * @param tokens The stream of tokens.
 579    *
 580    * @return The last token of the group address (the terminating ':").
 581    */
 582  35 private AddressToken scanGroupAddress(TokenStream tokens) throws AddressException {
 583    // A group does not require that there be anything between the ':' and ';". This is
 584    // just a group with an empty list.
 585  35 AddressToken token = tokens.nextRealToken();
 586   
 587    // now scan until we reach the terminator. The only validation is done on illegal characters.
 588  35 while (true) {
 589  199 switch (token.type) {
 590    // The following tokens are all valid in group addresses, so just skip over them.
 591  83 case ATOM:
 592  0 case QUOTED_LITERAL:
 593  0 case DOMAIN_LITERAL:
 594  25 case PERIOD:
 595  25 case AT_SIGN:
 596  20 case COMMA:
 597  153 break;
 598   
 599  1 case COLON:
 600  1 illegalAddress("Nested group", token);
 601   
 602    // route address within a group specifier....we need to at least verify the bracket nesting
 603    // and higher level syntax of the route.
 604  12 case LEFT_ANGLE:
 605  12 scanRouteAddress(tokens, true);
 606  11 break;
 607   
 608    // the only allowed terminator is the ';'
 609  1 case END_OF_TOKENS:
 610  1 illegalAddress("Missing ';'", token);
 611   
 612    // now for the illegal ones in this context.
 613  32 case SEMICOLON:
 614    // verify there's nothing illegal after this.
 615  32 AddressToken next = tokens.nextRealToken();
 616  32 if (next.type != COMMA && next.type != END_OF_TOKENS) {
 617  0 illegalAddress("Illegal address", token);
 618    }
 619    // don't forget to put this back on...our caller will need it.
 620  32 tokens.pushToken(next);
 621  32 return token;
 622   
 623  0 case RIGHT_ANGLE:
 624  0 illegalAddress("Unexpected '>'", token);
 625    }
 626  164 token = tokens.nextRealToken();
 627    }
 628    }
 629   
 630   
 631    /**
 632    * Parse the provided internet address into a set of tokens. This
 633    * phase only does a syntax check on the tokens. The interpretation
 634    * of the tokens is the next phase.
 635    *
 636    * @exception AddressException
 637    */
 638  326 private TokenStream tokenizeAddress() throws AddressException {
 639   
 640    // get a list for the set of tokens
 641  326 TokenStream tokens = new TokenStream();
 642   
 643  326 end = addresses.length(); // our parsing end marker
 644   
 645    // now scan along the string looking for the special characters in an internet address.
 646  326 while (moreCharacters()) {
 647  2624 char ch = currentChar();
 648   
 649  2624 switch (ch) {
 650    // start of a comment bit...ignore everything until we hit a closing paren.
 651  55 case '(':
 652  55 scanComment(tokens);
 653  53 break;
 654    // a closing paren found outside of normal processing.
 655  0 case ')':
 656  0 syntaxError("Unexpected ')'", position);
 657   
 658   
 659    // start of a quoted string
 660  48 case '"':
 661  48 scanQuotedLiteral(tokens);
 662  46 break;
 663    // domain literal
 664  37 case '[':
 665  37 scanDomainLiteral(tokens);
 666  32 break;
 667   
 668    // a naked closing bracket...not valid except as part of a domain literal.
 669  0 case ']':
 670  0 syntaxError("Unexpected ']'", position);
 671   
 672    // special character delimiters
 673  119 case '<':
 674  119 tokens.addToken(new AddressToken(LEFT_ANGLE, position));
 675  119 nextChar();
 676  119 break;
 677   
 678    // a naked closing bracket...not valid without a starting one, but
 679    // we need to handle this in context.
 680  112 case '>':
 681  112 tokens.addToken(new AddressToken(RIGHT_ANGLE, position));
 682  112 nextChar();
 683  112 break;
 684  72 case ':':
 685  72 tokens.addToken(new AddressToken(COLON, position));
 686  72 nextChar();
 687  72 break;
 688  62 case ',':
 689  62 tokens.addToken(new AddressToken(COMMA, position));
 690  62 nextChar();
 691  62 break;
 692  424 case '.':
 693  424 tokens.addToken(new AddressToken(PERIOD, position));
 694  424 nextChar();
 695  424 break;
 696  49 case ';':
 697  49 tokens.addToken(new AddressToken(SEMICOLON, position));
 698  49 nextChar();
 699  49 break;
 700  345 case '@':
 701  345 tokens.addToken(new AddressToken(AT_SIGN, position));
 702  345 nextChar();
 703  345 break;
 704   
 705    // white space characters. These are mostly token delimiters, but there are some relaxed
 706    // situations where they get processed, so we need to add a white space token for the first
 707    // one we encounter in a span.
 708  138 case ' ':
 709  0 case '\t':
 710  0 case '\r':
 711  0 case '\n':
 712    // add a single white space token
 713  138 tokens.addToken(new AddressToken(WHITESPACE, position));
 714   
 715  138 nextChar();
 716    // step over any space characters, leaving us positioned either at the end
 717    // or the first
 718  138 while (moreCharacters()) {
 719  138 char nextChar = currentChar();
 720  138 if (nextChar == ' ' || nextChar == '\t' || nextChar == '\r' || nextChar == '\n') {
 721  0 nextChar();
 722    }
 723    else {
 724  138 break;
 725    }
 726    }
 727  138 break;
 728   
 729    // potentially an atom...if it starts with an allowed atom character, we
 730    // parse out the token, otherwise this is invalid.
 731  1163 default:
 732  1163 if (ch < 040 || ch >= 0177) {
 733  0 syntaxError("Illegal character in address", position);
 734    }
 735   
 736  1163 scanAtom(tokens);
 737  1163 break;
 738    }
 739    }
 740   
 741    // for this end marker, give an end position.
 742  317 tokens.addToken(new AddressToken(END_OF_TOKENS, addresses.length()));
 743  317 return tokens;
 744    }
 745   
 746   
 747    /**
 748    * Step to the next character position while parsing.
 749    */
 750  7401 private void nextChar() {
 751  7401 position++;
 752    }
 753   
 754   
 755    /**
 756    * Retrieve the character at the current parsing position.
 757    *
 758    * @return The current character.
 759    */
 760  8556 private char currentChar() {
 761  8556 return addresses.charAt(position);
 762    }
 763   
 764    /**
 765    * Test if there are more characters left to parse.
 766    *
 767    * @return True if we've hit the last character, false otherwise.
 768    */
 769  9028 private boolean moreCharacters() {
 770  9028 return position < end;
 771    }
 772   
 773   
 774    /**
 775    * Parse a quoted string as specified by the RFC822 specification.
 776    *
 777    * @param tokens The TokenStream where the parsed out token is added.
 778    */
 779  48 private void scanQuotedLiteral(TokenStream tokens) throws AddressException {
 780  48 StringBuffer value = new StringBuffer();
 781   
 782    // save the start position for the token.
 783  48 int startPosition = position;
 784    // step over the quote delimiter.
 785  48 nextChar();
 786   
 787  48 while (moreCharacters()) {
 788  292 char ch = currentChar();
 789   
 790    // is this an escape char?
 791  292 if (ch == '\\') {
 792    // step past this, and grab the following character
 793  13 nextChar();
 794  13 if (!moreCharacters()) {
 795  0 syntaxError("Missing '\"'", position);
 796    }
 797  13 value.append(currentChar());
 798    }
 799    // end of the string?
 800  279 else if (ch == '"') {
 801    // return the constructed string.
 802  46 tokens.addToken(new AddressToken(value.toString(), QUOTED_LITERAL, position));
 803    // step over the close delimiter for the benefit of the next token.
 804  46 nextChar();
 805  46 return;
 806    }
 807    // the RFC822 spec disallows CR characters.
 808  233 else if (ch == '\r') {
 809  1 syntaxError("Illegal line end in literal", position);
 810    }
 811    else
 812    {
 813  232 value.append(ch);
 814    }
 815  245 nextChar();
 816    }
 817    // missing delimiter
 818  1 syntaxError("Missing '\"'", position);
 819    }
 820   
 821   
 822    /**
 823    * Parse a domain literal as specified by the RFC822 specification.
 824    *
 825    * @param tokens The TokenStream where the parsed out token is added.
 826    */
 827  37 private void scanDomainLiteral(TokenStream tokens) throws AddressException {
 828  37 StringBuffer value = new StringBuffer();
 829   
 830  37 int startPosition = position;
 831    // step over the quote delimiter.
 832  37 nextChar();
 833   
 834  37 while (moreCharacters()) {
 835  223 char ch = currentChar();
 836   
 837    // is this an escape char?
 838  223 if (ch == '\\') {
 839    // because domain literals don't get extra escaping, we render them
 840    // with the escaped characters intact. Therefore, append the '\' escape
 841    // first, then append the escaped character without examination.
 842  3 value.append(currentChar());
 843    // step past this, and grab the following character
 844  3 nextChar();
 845  3 if (!moreCharacters()) {
 846  0 syntaxError("Missing '\"'", position);
 847    }
 848  3 value.append(currentChar());
 849    }
 850    // end of the string?
 851  220 else if (ch == ']') {
 852    // return the constructed string.
 853  32 tokens.addToken(new AddressToken(value.toString(), DOMAIN_LITERAL, startPosition));
 854    // step over the close delimiter for the benefit of the next token.
 855  32 nextChar();
 856  32 return;
 857    }
 858    // the RFC822 spec says no nesting
 859  188 else if (ch == '[') {
 860  1 syntaxError("Unexpected '['", position);
 861    }
 862    // carriage returns are similarly illegal.
 863  187 else if (ch == '\r') {
 864  1 syntaxError("Illegal line end in domain literal", position);
 865    }
 866    else
 867    {
 868  186 value.append(ch);
 869    }
 870  189 nextChar();
 871    }
 872    // missing delimiter
 873  3 syntaxError("Missing ']'", position);
 874    }
 875   
 876    /**
 877    * Scan an atom in an internet address, using the RFC822 rules
 878    * for atom delimiters.
 879    *
 880    * @param tokens The TokenStream where the parsed out token is added.
 881    */
 882  1163 private void scanAtom(TokenStream tokens) throws AddressException {
 883  1163 int start = position;
 884  1163 nextChar();
 885  1163 while (moreCharacters()) {
 886   
 887  4943 char ch = currentChar();
 888  4943 if (isAtom(ch)) {
 889  3933 nextChar();
 890    }
 891    else {
 892  1010 break;
 893    }
 894    }
 895   
 896    // return the scanned part of the string.
 897  1163 tokens.addToken(new AddressToken(addresses.substring(start, position), ATOM, start));
 898    }
 899   
 900   
 901    /**
 902    * Parse an internet address comment field as specified by
 903    * RFC822. Includes support for quoted characters and nesting.
 904    *
 905    * @param tokens The TokenStream where the parsed out token is added.
 906    */
 907  55 private void scanComment(TokenStream tokens) throws AddressException {
 908  55 StringBuffer value = new StringBuffer();
 909   
 910  55 int startPosition = position;
 911    // step past the start character
 912  55 nextChar();
 913   
 914    // we're at the top nesting level on the comment.
 915  55 int nest = 1;
 916   
 917    // scan while we have more characters.
 918  55 while (moreCharacters()) {
 919  315 char ch = currentChar();
 920    // escape character?
 921  315 if (ch == '\\') {
 922    // step over this...if escaped, we must have at least one more character
 923    // in the string.
 924  2 nextChar();
 925  2 if (!moreCharacters()) {
 926  0 syntaxError("Missing ')'", position);
 927    }
 928  2 value.append(currentChar());
 929    }
 930    // nested comment?
 931  313 else if (ch == '(') {
 932    // step the nesting level...we treat the comment as a single unit, with the delimiters
 933    // for the nested comments embedded in the middle
 934  1 nest++;
 935  1 value.append(ch);
 936    }
 937    // is this the comment close?
 938  312 else if (ch == ')') {
 939    // reduce the nesting level. If we still have more to process, add the delimiter character
 940    // and keep going.
 941  54 nest--;
 942  54 if (nest > 0) {
 943  1 value.append(ch);
 944    }
 945    else {
 946    // step past this and return. The outermost comment delimiter is not included in
 947    // the string value, since this is frequently used as personal data on the
 948    // InternetAddress objects.
 949  53 nextChar();
 950  53 tokens.addToken(new AddressToken(value.toString(), COMMENT, startPosition));
 951  53 return;
 952    }
 953    }
 954  258 else if (ch == '\r') {
 955  1 syntaxError("Illegal line end in comment", position);
 956    }
 957    else {
 958  257 value.append(ch);
 959    }
 960    // step to the next character.
 961  261 nextChar();
 962    }
 963    // ran out of data before seeing the closing bit, not good
 964  1 syntaxError("Missing ')'", position);
 965    }
 966   
 967   
 968    /**
 969    * Validate the syntax of an RFC822 group internet address specification.
 970    *
 971    * @param tokens The stream of tokens for the address.
 972    *
 973    * @exception AddressException
 974    */
 975  31 private void validateGroup(TokenStream tokens) throws AddressException {
 976    // we know already this is an address in the form "phrase:group;". Now we need to validate the
 977    // elements.
 978   
 979  31 int phraseCount = 0;
 980   
 981  31 AddressToken token = tokens.nextRealToken();
 982    // now scan to the semi color, ensuring we have only word or comment tokens.
 983  31 while (token.type != COLON) {
 984    // only these tokens are allowed here.
 985  40 if (token.type != ATOM && token.type != QUOTED_LITERAL) {
 986  0 invalidToken(token);
 987    }
 988  40 phraseCount++;
 989  40 token = tokens.nextRealToken();
 990    }
 991   
 992   
 993    // RFC822 groups require a leading phrase in group specifiers.
 994  31 if (phraseCount == 0) {
 995  1 illegalAddress("Missing group identifier phrase", token);
 996    }
 997   
 998    // now we do the remainder of the parsing using the initial phrase list as the sink...the entire
 999    // address will be converted to a string later.
 1000   
 1001    // ok, we only know this has been valid up to the ":", now we have some real checks to perform.
 1002  30 while (true) {
 1003    // go scan off a mailbox. if everything goes according to plan, we should be positioned at either
 1004    // a comma or a semicolon.
 1005  48 validateGroupMailbox(tokens);
 1006   
 1007  48 token = tokens.nextRealToken();
 1008   
 1009    // we're at the end of the group. Make sure this is truely the end.
 1010  48 if (token.type == SEMICOLON) {
 1011  30 token = tokens.nextRealToken();
 1012  30 if (token.type != END_OF_TOKENS) {
 1013  0 illegalAddress("Illegal group address", token);
 1014    }
 1015  30 return;
 1016    }
 1017   
 1018    // if not a semicolon, this better be a comma.
 1019  18 else if (token.type != COMMA) {
 1020  0 illegalAddress("Illegal group address", token);
 1021    }
 1022    }
 1023    }
 1024   
 1025   
 1026    /**
 1027    * Validate the syntax of single mailbox within a group address.
 1028    *
 1029    * @param tokens The stream of tokens representing the address.
 1030    *
 1031    * @exception AddressException
 1032    */
 1033  48 private void validateGroupMailbox(TokenStream tokens) throws AddressException {
 1034  48 AddressToken first = tokens.nextRealToken();
 1035    // is this just a null address in the list? then push the terminator back and return.
 1036  48 if (first.type == COMMA || first.type == SEMICOLON) {
 1037  14 tokens.pushToken(first);
 1038  14 return;
 1039    }
 1040   
 1041    // now we need to scan ahead to see if we can determine the type.
 1042  34 AddressToken token = first;
 1043   
 1044   
 1045    // we need to scan forward to figure out what sort of address this is.
 1046  62 while (first != null) {
 1047  62 switch (token.type) {
 1048    // until we know the context, these are all just ignored.
 1049  0 case QUOTED_LITERAL:
 1050  28 case ATOM:
 1051  28 break;
 1052   
 1053    // a LEFT_ANGLE indicates we have a full RFC822 mailbox form. The leading phrase
 1054    // is the personal info. The address is inside the brackets.
 1055  11 case LEFT_ANGLE:
 1056  11 tokens.pushToken(first);
 1057  11 validatePhrase(tokens, false);
 1058  11 validateRouteAddr(tokens, true);
 1059  11 return;
 1060   
 1061    // we've hit a period as the first non-word token. This should be part of a local-part
 1062    // of an address.
 1063  0 case PERIOD:
 1064    // we've hit an "@" as the first non-word token. This is probably a simple address in
 1065    // the form "user@domain".
 1066  21 case AT_SIGN:
 1067  21 tokens.pushToken(first);
 1068  21 validateAddressSpec(tokens);
 1069  21 return;
 1070   
 1071    // reached the end of string...this might be a null address, or one of the very simple name
 1072    // forms used for non-strict RFC822 versions. Reset, and try that form
 1073  1 case COMMA:
 1074    // this is the end of the group...handle it like a comma for now.
 1075  1 case SEMICOLON:
 1076  2 tokens.pushToken(first);
 1077  2 validateAddressSpec(tokens);
 1078  2 return;
 1079   
 1080  0 case END_OF_TOKENS:
 1081  0 illegalAddress("Missing ';'", token);
 1082   
 1083    }
 1084  28 token = tokens.nextRealToken();
 1085    }
 1086    }
 1087   
 1088   
 1089    /**
 1090    * Utility method for throwing an AddressException caused by an
 1091    * unexpected primitive token.
 1092    *
 1093    * @param token The token causing the problem (must not be a value type token).
 1094    *
 1095    * @exception AddressException
 1096    */
 1097  0 private void invalidToken(AddressToken token) throws AddressException {
 1098  0 illegalAddress("Unexpected '" + token.type + "'", token);
 1099    }
 1100   
 1101   
 1102    /**
 1103    * Raise an error about illegal syntax.
 1104    *
 1105    * @param message The message used in the thrown exception.
 1106    * @param position The parsing position within the string.
 1107    *
 1108    * @exception AddressException
 1109    */
 1110  9 private void syntaxError(String message, int position) throws AddressException
 1111    {
 1112  9 throw new AddressException(message, addresses, position);
 1113    }
 1114   
 1115   
 1116    /**
 1117    * Throw an exception based on the position of an invalid token.
 1118    *
 1119    * @param message The exception message.
 1120    * @param token The token causing the error. This tokens position is used
 1121    * in the exception information.
 1122    */
 1123  16 private void illegalAddress(String message, AddressToken token) throws AddressException {
 1124  16 throw new AddressException(message, addresses, token.position);
 1125    }
 1126   
 1127   
 1128    /**
 1129    * Validate that a required phrase exists.
 1130    *
 1131    * @param tokens The set of tokens to validate. positioned at the phrase start.
 1132    * @param required A flag indicating whether the phrase is optional or required.
 1133    *
 1134    * @exception AddressException
 1135    */
 1136  11 private void validatePhrase(TokenStream tokens, boolean required) throws AddressException {
 1137    // we need to have at least one WORD token in the phrase...everything is optional
 1138    // after that.
 1139  11 AddressToken token = tokens.nextRealToken();
 1140  11 if (token.type != ATOM && token.type != QUOTED_LITERAL) {
 1141  6 if (required) {
 1142  0 illegalAddress("Missing group phrase", token);
 1143    }
 1144    }
 1145   
 1146    // now scan forward to the end of the phrase
 1147  11 token = tokens.nextRealToken();
 1148  11 while (token.type == ATOM || token.type == QUOTED_LITERAL) {
 1149  6 token = tokens.nextRealToken();
 1150    }
 1151    }
 1152   
 1153   
 1154    /**
 1155    * validate a routeaddr specification
 1156    *
 1157    * @param tokens The tokens representing the address portion (personal information
 1158    * already removed).
 1159    * @param ingroup true indicates we're validating a route address inside a
 1160    * group list. false indicates we're validating a standalone
 1161    * address.
 1162    *
 1163    * @exception AddressException
 1164    */
 1165  102 private void validateRouteAddr(TokenStream tokens, boolean ingroup) throws AddressException {
 1166    // get the next real token.
 1167  102 AddressToken token = tokens.nextRealToken();
 1168    // if this is an at sign, then we have a list of domains to parse.
 1169  102 if (token.type == AT_SIGN) {
 1170    // push the marker token back in for the route parser, and step past that part.
 1171  20 tokens.pushToken(token);
 1172  20 validateRoute(tokens);
 1173    }
 1174    else {
 1175    // we need to push this back on to validate the local part.
 1176  82 tokens.pushToken(token);
 1177    }
 1178   
 1179    // now we expect to see an address spec.
 1180  102 validateAddressSpec(tokens);
 1181   
 1182  101 token = tokens.nextRealToken();
 1183  101 if (ingroup) {
 1184    // if we're validating within a group specification, the angle brackets are still there (and
 1185    // required).
 1186  11 if (token.type != RIGHT_ANGLE) {
 1187  0 illegalAddress("Missing '>'", token);
 1188    }
 1189    }
 1190    else {
 1191    // the angle brackets were removed to make this an address, so we should be done. Make sure we
 1192    // have a terminator here.
 1193  90 if (token.type != END_OF_TOKENS) {
 1194  0 illegalAddress("Illegal Address", token);
 1195    }
 1196    }
 1197    }
 1198   
 1199   
 1200   
 1201    /**
 1202    * Validate a simple address in the form "user@domain".
 1203    *
 1204    * @param tokens The stream of tokens representing the address.
 1205    */
 1206  186 private void validateSimpleAddress(TokenStream tokens) throws AddressException {
 1207   
 1208    // the validation routines occur after addresses have been split into
 1209    // personal and address forms. Therefore, our validation begins directly
 1210    // with the first token.
 1211  186 validateAddressSpec(tokens);
 1212   
 1213    // get the next token and see if there is something here...anything but the terminator is an error
 1214  183 AddressToken token = tokens.nextRealToken();
 1215  183 if (token.type != END_OF_TOKENS) {
 1216  6 illegalAddress("Illegal Address", token);
 1217    }
 1218    }
 1219   
 1220    /**
 1221    * Validate the addr-spec portion of an address. RFC822 requires
 1222    * this be of the form "local-part@domain". However, javamail also
 1223    * allows simple address of the form "local-part". We only require
 1224    * the domain if an '@' is encountered.
 1225    *
 1226    * @param tokens
 1227    */
 1228  311 private void validateAddressSpec(TokenStream tokens) throws AddressException {
 1229    // all addresses, even the simple ones, must have at least a local part.
 1230  311 validateLocalPart(tokens);
 1231   
 1232    // now see if we have a domain portion to look at.
 1233  307 AddressToken token = tokens.nextRealToken();
 1234  307 if (token.type == AT_SIGN) {
 1235  268 validateDomain(tokens);
 1236    }
 1237    else {
 1238    // put this back for termination
 1239  39 tokens.pushToken(token);
 1240    }
 1241   
 1242    }
 1243   
 1244   
 1245    /**
 1246    * Validate the route portion of a route-addr. This is a list
 1247    * of domain values in the form 1#("@" domain) ":".
 1248    *
 1249    * @param tokens The token stream holding the address information.
 1250    */
 1251  20 private void validateRoute(TokenStream tokens) throws AddressException {
 1252  20 while (true) {
 1253  56 AddressToken token = tokens.nextRealToken();
 1254    // if this is the first part of the list, go parse off a domain
 1255  56 if (token.type == AT_SIGN) {
 1256  28 validateDomain(tokens);
 1257    }
 1258    // another element in the list? Go around again
 1259  28 else if (token.type == COMMA) {
 1260  8 continue;
 1261    }
 1262    // the list is terminated by a colon...stop this part of the validation once we hit one.
 1263  20 else if (token.type == COLON) {
 1264  20 return;
 1265    }
 1266    // the list is terminated by a colon. If this isn't one of those, we have an error.
 1267    else {
 1268  0 illegalAddress("Missing ':'", token);
 1269    }
 1270    }
 1271    }
 1272   
 1273   
 1274    /**
 1275    * Parse the local part of an address spec. The local part
 1276    * is a series of "words" separated by ".".
 1277    */
 1278  311 private void validateLocalPart(TokenStream tokens) throws AddressException {
 1279  311 while (true) {
 1280    // get the token.
 1281  389 AddressToken token = tokens.nextRealToken();
 1282   
 1283    // this must be either an atom or a literal.
 1284  389 if (token.type != ATOM && token.type != QUOTED_LITERAL) {
 1285  4 illegalAddress("Invalid local part", token);
 1286    }
 1287   
 1288    // get the next token (white space and comments ignored)
 1289  385 token = tokens.nextRealToken();
 1290    // if this is a period, we continue parsing
 1291  385 if (token.type != PERIOD) {
 1292  307 tokens.pushToken(token);
 1293    // return the token
 1294  307 return;
 1295    }
 1296    }
 1297    }
 1298   
 1299   
 1300   
 1301    /**
 1302    * Parse a domain name of the form sub-domain *("." sub-domain).
 1303    * a sub-domain is either an atom or a domain-literal.
 1304    */
 1305  296 private void validateDomain(TokenStream tokens) throws AddressException {
 1306  296 while (true) {
 1307    // get the token.
 1308  594 AddressToken token = tokens.nextRealToken();
 1309   
 1310    // this must be either an atom or a domain literal.
 1311  594 if (token.type != ATOM && token.type != DOMAIN_LITERAL) {
 1312  0 illegalAddress("Invalid domain", token);
 1313    }
 1314   
 1315    // get the next token (white space is ignored)
 1316  594 token = tokens.nextRealToken();
 1317    // if this is a period, we continue parsing
 1318  594 if (token.type != PERIOD) {
 1319    // return the token
 1320  296 tokens.pushToken(token);
 1321  296 return;
 1322    }
 1323    }
 1324    }
 1325   
 1326    /**
 1327    * Convert a list of word tokens into a phrase string. The
 1328    * rules for this are a little hard to puzzle out, but there
 1329    * is a logic to it. If the list is empty, the phrase is
 1330    * just a null value.
 1331    *
 1332    * If we have a phrase, then the quoted strings need to
 1333    * handled appropriately. In multi-token phrases, the
 1334    * quoted literals are concatenated with the quotes intact,
 1335    * regardless of content. Thus a phrase that comes in like this:
 1336    *
 1337    * "Geronimo" Apache
 1338    *
 1339    * gets converted back to the same string.
 1340    *
 1341    * If there is just a single token in the phrase, AND the token
 1342    * is a quoted string AND the string does not contain embedded
 1343    * special characters ("\.,@<>()[]:;), then the phrase
 1344    * is expressed as an atom. Thus the literal
 1345    *
 1346    * "Geronimo"
 1347    *
 1348    * becomes
 1349    *
 1350    * Geronimo
 1351    *
 1352    * but
 1353    *
 1354    * "(Geronimo)"
 1355    *
 1356    * remains
 1357    *
 1358    * "(Geronimo)"
 1359    *
 1360    * Note that we're generating a canonical form of the phrase,
 1361    * which removes comments and reduces linear whitespace down
 1362    * to a single separator token.
 1363    *
 1364    * @param phrase An array list of phrase tokens (which may be empty).
 1365    */
 1366  44 private String personalToString(TokenStream tokens) {
 1367   
 1368    // no tokens in the stream? This is a null value.
 1369  44 AddressToken token = tokens.nextToken();
 1370   
 1371  44 if (token.type == END_OF_TOKENS) {
 1372  0 return null;
 1373    }
 1374   
 1375  44 AddressToken next = tokens.nextToken();
 1376   
 1377    // single element phrases get special treatment.
 1378  44 if (next.type == END_OF_TOKENS) {
 1379    // this can be used directly...if it contains special characters, quoting will be
 1380    // performed when it's converted to a string value.
 1381  27 return token.value;
 1382    }
 1383   
 1384    // reset to the beginning
 1385  17 tokens.pushToken(token);
 1386   
 1387    // have at least two tokens,
 1388  17 StringBuffer buffer = new StringBuffer();
 1389   
 1390    // get the first token. After the first, we add these as blank delimited values.
 1391  17 token = tokens.nextToken();
 1392  17 addTokenValue(token, buffer);
 1393   
 1394  17 token = tokens.nextToken();
 1395  17 while (token.type != END_OF_TOKENS) {
 1396    // add a blank separator
 1397  18 buffer.append(' ');
 1398    // now add the next tokens value
 1399  18 addTokenValue(token, buffer);
 1400  18 token = tokens.nextToken();
 1401    }
 1402    // and return the canonicalized value
 1403  17 return buffer.toString();
 1404    }
 1405   
 1406   
 1407    /**
 1408    * take a canonicalized set of address tokens and reformat it back into a string value,
 1409    * inserting whitespace where appropriate.
 1410    *
 1411    * @param tokens The set of tokens representing the address.
 1412    *
 1413    * @return The string value of the tokens.
 1414    */
 1415  317 private String addressToString(TokenStream tokens) {
 1416  317 StringBuffer buffer = new StringBuffer();
 1417   
 1418    // this flag controls whether we insert a blank delimiter between tokens as
 1419    // we advance through the list. Blanks are only inserted between consequtive value tokens.
 1420    // Initially, this is false, then we flip it to true whenever we add a value token, and
 1421    // back to false for any special character token.
 1422  317 boolean spaceRequired = false;
 1423   
 1424    // we use nextToken rather than nextRealToken(), since we need to process the comments also.
 1425  317 AddressToken token = tokens.nextToken();
 1426   
 1427    // now add each of the tokens
 1428  317 while (token.type != END_OF_TOKENS) {
 1429  1944 switch (token.type) {
 1430    // the word tokens are the only ones where we need to worry about adding
 1431    // whitespace delimiters.
 1432  1038 case ATOM:
 1433  21 case QUOTED_LITERAL:
 1434    // was the last token also a word? Insert a blank first.
 1435  1059 if (spaceRequired) {
 1436  11 buffer.append(' ');
 1437    }
 1438  1059 addTokenValue(token, buffer);
 1439    // let the next iteration know we just added a word to the list.
 1440  1059 spaceRequired = true;
 1441  1059 break;
 1442   
 1443    // these special characters are just added in. The constants for the character types
 1444    // were carefully selected to be the character value in question. This allows us to
 1445    // just append the value.
 1446  11 case LEFT_ANGLE:
 1447  11 case RIGHT_ANGLE:
 1448  27 case COMMA:
 1449  53 case COLON:
 1450  319 case AT_SIGN:
 1451  31 case SEMICOLON:
 1452  403 case PERIOD:
 1453  855 buffer.append((char)token.type);
 1454    // no spaces around specials
 1455  855 spaceRequired = false;
 1456  855 break;
 1457   
 1458    // Domain literals self delimiting...we can just append them and turn off the space flag.
 1459  30 case DOMAIN_LITERAL:
 1460  30 addTokenValue(token, buffer);
 1461  30 spaceRequired = false;
 1462  30 break;
 1463   
 1464    // Comments are also self delimitin.
 1465  0 case COMMENT:
 1466  0 addTokenValue(token, buffer);
 1467  0 spaceRequired = false;
 1468  0 break;
 1469    }
 1470  1944 token = tokens.nextToken();
 1471    }
 1472  317 return buffer.toString();
 1473    }
 1474   
 1475   
 1476    /**
 1477    * Append a value token on to a string buffer used to create
 1478    * the canonicalized string value.
 1479    *
 1480    * @param token The token we're adding.
 1481    * @param buffer The target string buffer.
 1482    */
 1483  1124 private void addTokenValue(AddressToken token, StringBuffer buffer) {
 1484    // atom values can be added directly.
 1485  1124 if (token.type == ATOM) {
 1486  1064 buffer.append(token.value);
 1487    }
 1488    // a literal value? Add this as a quoted string
 1489  60 else if (token.type == QUOTED_LITERAL) {
 1490  29 buffer.append(formatQuotedString(token.value));
 1491    }
 1492    // could be a domain literal of the form "[value]"
 1493  31 else if (token.type == DOMAIN_LITERAL) {
 1494  30 buffer.append('[');
 1495  30 buffer.append(token.value);
 1496  30 buffer.append(']');
 1497    }
 1498    // comments also have values
 1499  1 else if (token.type == COMMENT) {
 1500  1 buffer.append('(');
 1501  1 buffer.append(token.value);
 1502  1 buffer.append(')');
 1503    }
 1504    }
 1505   
 1506   
 1507   
 1508    private static final byte[] CHARMAP = {
 1509    0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x06, 0x02, 0x06, 0x02, 0x02, 0x06, 0x02, 0x02,
 1510    0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
 1511    0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00,
 1512    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x00, 0x01, 0x00,
 1513   
 1514    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 1515    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x00, 0x00,
 1516    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 1517    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
 1518    };
 1519   
 1520    private static final byte FLG_SPECIAL = 1;
 1521    private static final byte FLG_CONTROL = 2;
 1522    private static final byte FLG_SPACE = 4;
 1523   
 1524  0 private static boolean isSpace(char ch) {
 1525  0 if (ch > '\u007f') {
 1526  0 return false;
 1527    } else {
 1528  0 return (CHARMAP[ch] & FLG_SPACE) != 0;
 1529    }
 1530    }
 1531   
 1532    /**
 1533    * Quick test to see if a character is an allowed atom character
 1534    * or not.
 1535    *
 1536    * @param ch The test character.
 1537    *
 1538    * @return true if this character is allowed in atoms, false for any
 1539    * control characters, special characters, or blanks.
 1540    */
 1541  5158 public static boolean isAtom(char ch) {
 1542  5158 if (ch > '\u007f') {
 1543  0 return false;
 1544    }
 1545  5158 else if (ch == ' ') {
 1546  57 return false;
 1547    }
 1548    else {
 1549  5101 return (CHARMAP[ch] & (FLG_SPECIAL | FLG_CONTROL)) == 0;
 1550    }
 1551    }
 1552   
 1553    /**
 1554    * Tests one string to determine if it contains any of the
 1555    * characters in a supplied test string.
 1556    *
 1557    * @param s The string we're testing.
 1558    * @param chars The set of characters we're testing against.
 1559    *
 1560    * @return true if any of the characters is found, false otherwise.
 1561    */
 1562  173 public static boolean containsCharacters(String s, String chars)
 1563    {
 1564  173 for (int i = 0; i < s.length(); i++) {
 1565  2100 if (chars.indexOf(s.charAt(i)) >= 0) {
 1566  62 return true;
 1567    }
 1568    }
 1569  111 return false;
 1570    }
 1571   
 1572   
 1573    /**
 1574    * Tests if a string contains any non-special characters that
 1575    * would require encoding the value as a quoted string rather
 1576    * than a simple atom value.
 1577    *
 1578    * @param s The test string.
 1579    *
 1580    * @return True if the string contains only blanks or allowed atom
 1581    * characters.
 1582    */
 1583  46 public static boolean containsSpecials(String s)
 1584    {
 1585  46 for (int i = 0; i < s.length(); i++) {
 1586  242 char ch = s.charAt(i);
 1587    // must be either a blank or an allowed atom char.
 1588  242 if (ch == ' ' || isAtom(ch)) {
 1589  239 continue;
 1590    }
 1591    else {
 1592  3 return true;
 1593    }
 1594    }
 1595  43 return false;
 1596    }
 1597   
 1598   
 1599    /**
 1600    * Tests if a string contains any non-special characters that
 1601    * would require encoding the value as a quoted string rather
 1602    * than a simple atom value.
 1603    *
 1604    * @param s The test string.
 1605    *
 1606    * @return True if the string contains only blanks or allowed atom
 1607    * characters.
 1608    */
 1609  0 public static boolean isAtom(String s)
 1610    {
 1611  0 for (int i = 0; i < s.length(); i++) {
 1612  0 char ch = s.charAt(i);
 1613    // must be an allowed atom character
 1614  0 if (!isAtom(ch)) {
 1615  0 return false;
 1616    }
 1617    }
 1618  0 return true;
 1619    }
 1620   
 1621    /**
 1622    * Apply RFC822 quoting rules to a literal string value. This
 1623    * will search the string to see if there are any characters that
 1624    * require special escaping, and apply the escapes. If the
 1625    * string is just a string of blank-delimited atoms, the string
 1626    * value is returned without quotes.
 1627    *
 1628    * @param s The source string.
 1629    *
 1630    * @return A version of the string as a valid RFC822 quoted literal.
 1631    */
 1632  63 public static String quoteString(String s) {
 1633   
 1634    // only backslash and double quote require escaping. If the string does not
 1635    // contain any of these, then we can just slap on some quotes and go.
 1636  63 if (s.indexOf('\\') == -1 && s.indexOf('"') == -1) {
 1637    // if the string is an atom (or a series of blank-delimited atoms), we can just return it directly.
 1638  46 if (!containsSpecials(s)) {
 1639  43 return s;
 1640    }
 1641  3 StringBuffer buffer = new StringBuffer(s.length() + 2);
 1642  3 buffer.append('"');
 1643  3 buffer.append(s);
 1644  3 buffer.append('"');
 1645  3 return buffer.toString();
 1646    }
 1647   
 1648    // get a buffer sufficiently large for the string, two quote characters, and a "reasonable"
 1649    // number of escaped values.
 1650  17 StringBuffer buffer = new StringBuffer(s.length() + 10);
 1651  17 buffer.append('"');
 1652   
 1653    // now check all of the characters.
 1654  17 for (int i = 0; i < s.length(); i++) {
 1655  143 char ch = s.charAt(i);
 1656    // character requiring escaping?
 1657  143 if (ch == '\\' || ch == '"') {
 1658    // add an extra backslash
 1659  26 buffer.append('\\');
 1660    }
 1661    // and add on the character
 1662  143 buffer.append(ch);
 1663    }
 1664  17 buffer.append('"');
 1665  17 return buffer.toString();
 1666    }
 1667   
 1668    /**
 1669    * Apply RFC822 quoting rules to a literal string value. This
 1670    * will search the string to see if there are any characters that
 1671    * require special escaping, and apply the escapes. The returned
 1672    * value is enclosed in quotes.
 1673    *
 1674    * @param s The source string.
 1675    *
 1676    * @return A version of the string as a valid RFC822 quoted literal.
 1677    */
 1678  29 public static String formatQuotedString(String s) {
 1679    // only backslash and double quote require escaping. If the string does not
 1680    // contain any of these, then we can just slap on some quotes and go.
 1681  29 if (s.indexOf('\\') == -1 && s.indexOf('"') == -1) {
 1682  29 StringBuffer buffer = new StringBuffer(s.length() + 2);
 1683  29 buffer.append('"');
 1684  29 buffer.append(s);
 1685  29 buffer.append('"');
 1686  29 return buffer.toString();
 1687    }
 1688   
 1689    // get a buffer sufficiently large for the string, two quote characters, and a "reasonable"
 1690    // number of escaped values.
 1691  0 StringBuffer buffer = new StringBuffer(s.length() + 10);
 1692  0 buffer.append('"');
 1693   
 1694    // now check all of the characters.
 1695  0 for (int i = 0; i < s.length(); i++) {
 1696  0 char ch = s.charAt(i);
 1697    // character requiring escaping?
 1698  0 if (ch == '\\' || ch == '"') {
 1699    // add an extra backslash
 1700  0 buffer.append('\\');
 1701    }
 1702    // and add on the character
 1703  0 buffer.append(ch);
 1704    }
 1705  0 buffer.append('"');
 1706  0 return buffer.toString();
 1707    }
 1708   
 1709    public class TokenStream {
 1710    // the set of tokens in the parsed address list, as determined by RFC822 syntax rules.
 1711    private List tokens;
 1712   
 1713    // the current token position
 1714    int currentToken = 0;
 1715   
 1716   
 1717    /**
 1718    * Default constructor for a TokenStream. This creates an
 1719    * empty TokenStream for purposes of tokenizing an address.
 1720    * It is the creator's responsibility to terminate the stream
 1721    * with a terminator token.
 1722    */
 1723  326 public TokenStream() {
 1724  326 tokens = new ArrayList();
 1725    }
 1726   
 1727   
 1728    /**
 1729    * Construct a TokenStream from a list of tokens. A terminator
 1730    * token is added to the end.
 1731    *
 1732    * @param tokens An existing token list.
 1733    */
 1734  387 public TokenStream(List tokens) {
 1735  387 this.tokens = tokens;
 1736  387 tokens.add(new AddressToken(END_OF_TOKENS, -1));
 1737    }
 1738   
 1739    /**
 1740    * Add an address token to the token list.
 1741    *
 1742    * @param t The new token to add to the list.
 1743    */
 1744  2932 public void addToken(AddressToken token) {
 1745  2932 tokens.add(token);
 1746    }
 1747   
 1748    /**
 1749    * Get the next token at the cursor position, advancing the
 1750    * position accordingly.
 1751    *
 1752    * @return The token at the current token position.
 1753    */
 1754  8904 public AddressToken nextToken() {
 1755  8904 AddressToken token = (AddressToken)tokens.get(currentToken++);
 1756    // we skip over white space tokens when operating in this mode, so
 1757    // check the token and iterate until we get a non-white space.
 1758  8904 while (token.type == WHITESPACE) {
 1759  255 token = (AddressToken)tokens.get(currentToken++);
 1760    }
 1761  8904 return token;
 1762    }
 1763   
 1764   
 1765    /**
 1766    * Get the next token at the cursor position, without advancing the
 1767    * position.
 1768    *
 1769    * @return The token at the current token position.
 1770    */
 1771  97 public AddressToken currentToken() {
 1772    // return the current token and step the cursor
 1773  97 return (AddressToken)tokens.get(currentToken);
 1774    }
 1775   
 1776   
 1777    /**
 1778    * Get the next non-comment token from the string. Comments are ignored, except as personal information
 1779    * for very simple address specifications.
 1780    *
 1781    * @return A token guaranteed not to be a whitespace token.
 1782    */
 1783  4335 public AddressToken nextRealToken()
 1784    {
 1785  4335 AddressToken token = nextToken();
 1786  4335 if (token.type == COMMENT) {
 1787  0 token = nextToken();
 1788    }
 1789  4335 return token;
 1790    }
 1791   
 1792    /**
 1793    * Push a token back on to the queue, making the index of this
 1794    * token the current cursor position.
 1795    *
 1796    * @param token The token to push.
 1797    */
 1798  1615 public void pushToken(AddressToken token) {
 1799    // just reset the cursor to the token's index position.
 1800  1615 currentToken = tokenIndex(token);
 1801    }
 1802   
 1803    /**
 1804    * Get the next token after a given token, without advancing the
 1805    * token position.
 1806    *
 1807    * @param token The token we're retrieving a token relative to.
 1808    *
 1809    * @return The next token in the list.
 1810    */
 1811  0 public AddressToken nextToken(AddressToken token) {
 1812  0 return (AddressToken)tokens.get(tokenIndex(token) + 1);
 1813    }
 1814   
 1815   
 1816    /**
 1817    * Return the token prior to a given token.
 1818    *
 1819    * @param token The token used for the index.
 1820    *
 1821    * @return The token prior to the index token in the list.
 1822    */
 1823  241 public AddressToken previousToken(AddressToken token) {
 1824  241 return (AddressToken)tokens.get(tokenIndex(token) - 1);
 1825    }
 1826   
 1827   
 1828    /**
 1829    * Retrieve a token at a given index position.
 1830    *
 1831    * @param index The target index.
 1832    */
 1833  0 public AddressToken getToken(int index)
 1834    {
 1835  0 return (AddressToken)tokens.get(index);
 1836    }
 1837   
 1838   
 1839    /**
 1840    * Retrieve the index of a particular token in the stream.
 1841    *
 1842    * @param token The target token.
 1843    *
 1844    * @return The index of the token within the stream. Returns -1 if this
 1845    * token is somehow not in the stream.
 1846    */
 1847  2630 public int tokenIndex(AddressToken token) {
 1848  2630 return tokens.indexOf(token);
 1849    }
 1850   
 1851   
 1852    /**
 1853    * Extract a new TokenStream running from the start token to the
 1854    * token preceeding the end token.
 1855    *
 1856    * @param start The starting token of the section.
 1857    * @param end The last token (+1) for the target section.
 1858    *
 1859    * @return A new TokenStream object for processing this section of tokens.
 1860    */
 1861  387 public TokenStream section(AddressToken start, AddressToken end) {
 1862  387 int startIndex = tokenIndex(start);
 1863  387 int endIndex = tokenIndex(end);
 1864   
 1865    // List.subList() returns a list backed by the original list. Since we need to add a
 1866    // terminator token to this list when we take the sublist, we need to manually copy the
 1867    // references so we don't end up munging the original list.
 1868  387 ArrayList list = new ArrayList(endIndex - startIndex + 2);
 1869   
 1870  387 for (int i = startIndex; i <= endIndex; i++) {
 1871  2222 list.add(tokens.get(i));
 1872    }
 1873  387 return new TokenStream(list);
 1874    }
 1875   
 1876   
 1877    /**
 1878    * Reset the token position back to the beginning of the
 1879    * stream.
 1880    */
 1881  317 public void reset() {
 1882  317 currentToken = 0;
 1883    }
 1884   
 1885    /**
 1886    * Scan forward looking for a non-blank token.
 1887    *
 1888    * @return The first non-blank token in the stream.
 1889    */
 1890  30 public AddressToken getNonBlank()
 1891    {
 1892  30 AddressToken token = currentToken();
 1893  30 while (token.type == WHITESPACE) {
 1894  0 currentToken++;
 1895  0 token = currentToken();
 1896    }
 1897  30 return token;
 1898    }
 1899   
 1900   
 1901    /**
 1902    * Extract a blank delimited token from a TokenStream. A blank
 1903    * delimited token is the set of tokens up to the next real whitespace
 1904    * token (comments not included).
 1905    *
 1906    * @return A TokenStream object with the new set of tokens.
 1907    */
 1908  30 public TokenStream getBlankDelimitedToken()
 1909    {
 1910    // get the next non-whitespace token.
 1911  30 AddressToken first = getNonBlank();
 1912    // if this is the end, we return null.
 1913  30 if (first.type == END_OF_TOKENS) {
 1914  15 return null;
 1915    }
 1916   
 1917  15 AddressToken last = first;
 1918   
 1919    // the methods for retrieving tokens skip over whitespace, so we're going to process this
 1920    // by index.
 1921  15 currentToken++;
 1922   
 1923  15 AddressToken token = currentToken();
 1924  15 while (true) {
 1925    // if this is our marker, then pluck out the section and return it.
 1926  67 if (token.type == END_OF_TOKENS || token.type == WHITESPACE) {
 1927  15 return section(first, last);
 1928    }
 1929  52 last = token;
 1930  52 currentToken++;
 1931    // we accept any and all tokens here.
 1932  52 token = currentToken();
 1933    }
 1934    }
 1935   
 1936    /**
 1937    * Return the index of the current cursor position.
 1938    *
 1939    * @return The integer index of the current token.
 1940    */
 1941  0 public int currentIndex() {
 1942  0 return currentToken;
 1943    }
 1944   
 1945  0 public void dumpTokens()
 1946    {
 1947  0 System.out.println(">>>>>>>>> Start dumping TokenStream tokens");
 1948  0 for (int i = 0; i < tokens.size(); i++) {
 1949  0 System.out.println("-------- Token: " + tokens.get(i));
 1950    }
 1951   
 1952  0 System.out.println("++++++++ cursor position=" + currentToken);
 1953  0 System.out.println(">>>>>>>>> End dumping TokenStream tokens");
 1954    }
 1955    }
 1956   
 1957   
 1958    /**
 1959    * Simple utility class for representing address tokens.
 1960    */
 1961    public class AddressToken {
 1962   
 1963    // the token type
 1964    int type;
 1965   
 1966    // string value of the token (can be null)
 1967    String value;
 1968   
 1969    // position of the token within the address string.
 1970    int position;
 1971   
 1972  2025 AddressToken(int type, int position)
 1973    {
 1974  2025 this.type = type;
 1975  2025 this.value = null;
 1976  2025 this.position = position;
 1977    }
 1978   
 1979  1294 AddressToken(String value, int type, int position)
 1980    {
 1981  1294 this.type = type;
 1982  1294 this.value = value;
 1983  1294 this.position = position;
 1984    }
 1985   
 1986  0 public String toString()
 1987    {
 1988  0 if (type == END_OF_TOKENS) {
 1989  0 return "AddressToken: type=END_OF_TOKENS";
 1990    }
 1991  0 if (value == null) {
 1992  0 return "AddressToken: type=" + (char)type;
 1993    }
 1994    else {
 1995  0 return "AddressToken: type=" + (char)type + " value=" + value;
 1996    }
 1997    }
 1998    }
 1999    }
 2000