001 /** 002 * 003 * Copyright 2003-2006 The Apache Software Foundation 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package javax.mail.internet; 019 020 import java.io.BufferedInputStream; 021 import java.io.BufferedReader; 022 import java.io.ByteArrayInputStream; 023 import java.io.ByteArrayOutputStream; 024 import java.io.IOException; 025 import java.io.InputStream; 026 import java.io.InputStreamReader; 027 import java.io.OutputStream; 028 import java.io.UnsupportedEncodingException; 029 import java.util.HashMap; 030 import java.util.Map; 031 import java.util.NoSuchElementException; 032 import java.util.StringTokenizer; 033 034 import javax.activation.DataHandler; 035 import javax.activation.DataSource; 036 import javax.mail.MessagingException; 037 038 import org.apache.geronimo.mail.util.ASCIIUtil; 039 import org.apache.geronimo.mail.util.Base64; 040 import org.apache.geronimo.mail.util.Base64DecoderStream; 041 import org.apache.geronimo.mail.util.Base64Encoder; 042 import org.apache.geronimo.mail.util.Base64EncoderStream; 043 import org.apache.geronimo.mail.util.QuotedPrintableDecoderStream; 044 import org.apache.geronimo.mail.util.QuotedPrintableEncoderStream; 045 import org.apache.geronimo.mail.util.QuotedPrintableEncoder; 046 import org.apache.geronimo.mail.util.QuotedPrintable; 047 import org.apache.geronimo.mail.util.SessionUtil; 048 import org.apache.geronimo.mail.util.UUDecoderStream; 049 import org.apache.geronimo.mail.util.UUEncoderStream; 050 051 // encodings include "base64", "quoted-printable", "7bit", "8bit" and "binary". 052 // In addition, "uuencode" is also supported. The 053 054 /** 055 * @version $Rev: 421852 $ $Date: 2006-07-14 03:02:19 -0700 (Fri, 14 Jul 2006) $ 056 */ 057 public class MimeUtility { 058 059 private static final String MIME_FOLDENCODEDWORDS = "mail.mime.foldencodedwords"; 060 private static final String MIME_DECODE_TEXT_STRICT = "mail.mime.decodetext.strict"; 061 private static final String MIME_FOLDTEXT = "mail.mime.foldtext"; 062 private static final int FOLD_THRESHOLD = 76; 063 064 private MimeUtility() { 065 } 066 067 public static final int ALL = -1; 068 069 private static String defaultJavaCharset; 070 private static String escapedChars = "\"\\\r\n"; 071 private static String linearWhiteSpace = " \t\r\n"; 072 073 private static String QP_WORD_SPECIALS = "=_?\"#$%&'(),.:;<>@[\\]^`{|}~"; 074 private static String QP_TEXT_SPECIALS = "=_?"; 075 076 // the javamail spec includes the ability to map java encoding names to MIME-specified names. Normally, 077 // these values are loaded from a character mapping file. 078 private static Map java2mime; 079 private static Map mime2java; 080 081 static { 082 // we need to load the mapping tables used by javaCharset() and mimeCharset(). 083 loadCharacterSetMappings(); 084 } 085 086 public static InputStream decode(InputStream in, String encoding) throws MessagingException { 087 encoding = encoding.toLowerCase(); 088 089 // some encodies are just pass-throughs, with no real decoding. 090 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) { 091 return in; 092 } 093 else if (encoding.equals("base64")) { 094 return new Base64DecoderStream(in); 095 } 096 // UUEncode is known by a couple historical extension names too. 097 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) { 098 return new UUDecoderStream(in); 099 } 100 else if (encoding.equals("quoted-printable")) { 101 return new QuotedPrintableDecoderStream(in); 102 } 103 else { 104 throw new MessagingException("Unknown encoding " + encoding); 105 } 106 } 107 108 /** 109 * Decode a string of text obtained from a mail header into 110 * it's proper form. The text generally will consist of a 111 * string of tokens, some of which may be encoded using 112 * base64 encoding. 113 * 114 * @param text The text to decode. 115 * 116 * @return The decoded test string. 117 * @exception UnsupportedEncodingException 118 */ 119 public static String decodeText(String text) throws UnsupportedEncodingException { 120 // if the text contains any encoded tokens, those tokens will be marked with "=?". If the 121 // source string doesn't contain that sequent, no decoding is required. 122 if (text.indexOf("=?") < 0) { 123 return text; 124 } 125 126 // we have two sets of rules we can apply. 127 if (!SessionUtil.getBooleanProperty(MIME_DECODE_TEXT_STRICT, true)) { 128 return decodeTextNonStrict(text); 129 } 130 131 int offset = 0; 132 int endOffset = text.length(); 133 134 int startWhiteSpace = -1; 135 int endWhiteSpace = -1; 136 137 StringBuffer decodedText = new StringBuffer(text.length()); 138 139 boolean previousTokenEncoded = false; 140 141 while (offset < endOffset) { 142 char ch = text.charAt(offset); 143 144 // is this a whitespace character? 145 if (linearWhiteSpace.indexOf(ch) != -1) { 146 startWhiteSpace = offset; 147 while (offset < endOffset) { 148 // step over the white space characters. 149 ch = text.charAt(offset); 150 if (linearWhiteSpace.indexOf(ch) != -1) { 151 offset++; 152 } 153 else { 154 // record the location of the first non lwsp and drop down to process the 155 // token characters. 156 endWhiteSpace = offset; 157 break; 158 } 159 } 160 } 161 else { 162 // we have a word token. We need to scan over the word and then try to parse it. 163 int wordStart = offset; 164 165 while (offset < endOffset) { 166 // step over the white space characters. 167 ch = text.charAt(offset); 168 if (linearWhiteSpace.indexOf(ch) == -1) { 169 offset++; 170 } 171 else { 172 break; 173 } 174 175 //NB: Trailing whitespace on these header strings will just be discarded. 176 } 177 // pull out the word token. 178 String word = text.substring(wordStart, offset); 179 // is the token encoded? decode the word 180 if (word.startsWith("=?")) { 181 try { 182 // if this gives a parsing failure, treat it like a non-encoded word. 183 String decodedWord = decodeWord(word); 184 185 // are any whitespace characters significant? Append 'em if we've got 'em. 186 if (!previousTokenEncoded) { 187 if (startWhiteSpace != -1) { 188 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace)); 189 startWhiteSpace = -1; 190 } 191 } 192 // this is definitely a decoded token. 193 previousTokenEncoded = true; 194 // and add this to the text. 195 decodedText.append(decodedWord); 196 // we continue parsing from here...we allow parsing errors to fall through 197 // and get handled as normal text. 198 continue; 199 200 } catch (ParseException e) { 201 } 202 } 203 // this is a normal token, so it doesn't matter what the previous token was. Add the white space 204 // if we have it. 205 if (startWhiteSpace != -1) { 206 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace)); 207 startWhiteSpace = -1; 208 } 209 // this is not a decoded token. 210 previousTokenEncoded = false; 211 decodedText.append(word); 212 } 213 } 214 215 return decodedText.toString(); 216 } 217 218 219 /** 220 * Decode a string of text obtained from a mail header into 221 * it's proper form. The text generally will consist of a 222 * string of tokens, some of which may be encoded using 223 * base64 encoding. This is for non-strict decoded for mailers that 224 * violate the RFC 2047 restriction that decoded tokens must be delimited 225 * by linear white space. This will scan tokens looking for inner tokens 226 * enclosed in "=?" -- "?=" pairs. 227 * 228 * @param text The text to decode. 229 * 230 * @return The decoded test string. 231 * @exception UnsupportedEncodingException 232 */ 233 private static String decodeTextNonStrict(String text) throws UnsupportedEncodingException { 234 int offset = 0; 235 int endOffset = text.length(); 236 237 int startWhiteSpace = -1; 238 int endWhiteSpace = -1; 239 240 StringBuffer decodedText = new StringBuffer(text.length()); 241 242 boolean previousTokenEncoded = false; 243 244 while (offset < endOffset) { 245 char ch = text.charAt(offset); 246 247 // is this a whitespace character? 248 if (linearWhiteSpace.indexOf(ch) != -1) { 249 startWhiteSpace = offset; 250 while (offset < endOffset) { 251 // step over the white space characters. 252 ch = text.charAt(offset); 253 if (linearWhiteSpace.indexOf(ch) != -1) { 254 offset++; 255 } 256 else { 257 // record the location of the first non lwsp and drop down to process the 258 // token characters. 259 endWhiteSpace = offset; 260 break; 261 } 262 } 263 } 264 else { 265 // we're at the start of a word token. We potentially need to break this up into subtokens 266 int wordStart = offset; 267 268 while (offset < endOffset) { 269 // step over the white space characters. 270 ch = text.charAt(offset); 271 if (linearWhiteSpace.indexOf(ch) == -1) { 272 offset++; 273 } 274 else { 275 break; 276 } 277 278 //NB: Trailing whitespace on these header strings will just be discarded. 279 } 280 // pull out the word token. 281 String word = text.substring(wordStart, offset); 282 283 int decodeStart = 0; 284 285 // now scan and process each of the bits within here. 286 while (decodeStart < word.length()) { 287 int tokenStart = word.indexOf("=?", decodeStart); 288 if (tokenStart == -1) { 289 // this is a normal token, so it doesn't matter what the previous token was. Add the white space 290 // if we have it. 291 if (startWhiteSpace != -1) { 292 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace)); 293 startWhiteSpace = -1; 294 } 295 // this is not a decoded token. 296 previousTokenEncoded = false; 297 decodedText.append(word.substring(decodeStart)); 298 // we're finished. 299 break; 300 } 301 // we have something to process 302 else { 303 // we might have a normal token preceeding this. 304 if (tokenStart != decodeStart) { 305 // this is a normal token, so it doesn't matter what the previous token was. Add the white space 306 // if we have it. 307 if (startWhiteSpace != -1) { 308 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace)); 309 startWhiteSpace = -1; 310 } 311 // this is not a decoded token. 312 previousTokenEncoded = false; 313 decodedText.append(word.substring(decodeStart, tokenStart)); 314 } 315 316 // now find the end marker. 317 int tokenEnd = word.indexOf("?=", tokenStart); 318 // sigh, an invalid token. Treat this as plain text. 319 if (tokenEnd == -1) { 320 // this is a normal token, so it doesn't matter what the previous token was. Add the white space 321 // if we have it. 322 if (startWhiteSpace != -1) { 323 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace)); 324 startWhiteSpace = -1; 325 } 326 // this is not a decoded token. 327 previousTokenEncoded = false; 328 decodedText.append(word.substring(tokenStart)); 329 // we're finished. 330 break; 331 } 332 else { 333 // update our ticker 334 decodeStart = tokenEnd + 2; 335 336 String token = word.substring(tokenStart, tokenEnd); 337 try { 338 // if this gives a parsing failure, treat it like a non-encoded word. 339 String decodedWord = decodeWord(token); 340 341 // are any whitespace characters significant? Append 'em if we've got 'em. 342 if (!previousTokenEncoded) { 343 if (startWhiteSpace != -1) { 344 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace)); 345 startWhiteSpace = -1; 346 } 347 } 348 // this is definitely a decoded token. 349 previousTokenEncoded = true; 350 // and add this to the text. 351 decodedText.append(decodedWord); 352 // we continue parsing from here...we allow parsing errors to fall through 353 // and get handled as normal text. 354 continue; 355 356 } catch (ParseException e) { 357 } 358 // this is a normal token, so it doesn't matter what the previous token was. Add the white space 359 // if we have it. 360 if (startWhiteSpace != -1) { 361 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace)); 362 startWhiteSpace = -1; 363 } 364 // this is not a decoded token. 365 previousTokenEncoded = false; 366 decodedText.append(token); 367 } 368 } 369 } 370 } 371 } 372 373 return decodedText.toString(); 374 } 375 376 /** 377 * Parse a string using the RFC 2047 rules for an "encoded-word" 378 * type. This encoding has the syntax: 379 * 380 * encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" 381 * 382 * @param word The possibly encoded word value. 383 * 384 * @return The decoded word. 385 * @exception ParseException 386 * @exception UnsupportedEncodingException 387 */ 388 public static String decodeWord(String word) throws ParseException, UnsupportedEncodingException { 389 // encoded words start with the characters "=?". If this not an encoded word, we throw a 390 // ParseException for the caller. 391 392 if (!word.startsWith("=?")) { 393 throw new ParseException("Invalid RFC 2047 encoded-word: " + word); 394 } 395 396 int charsetPos = word.indexOf('?', 2); 397 if (charsetPos == -1) { 398 throw new ParseException("Missing charset in RFC 2047 encoded-word: " + word); 399 } 400 401 // pull out the character set information (this is the MIME name at this point). 402 String charset = word.substring(2, charsetPos).toLowerCase(); 403 404 // now pull out the encoding token the same way. 405 int encodingPos = word.indexOf('?', charsetPos + 1); 406 if (encodingPos == -1) { 407 throw new ParseException("Missing encoding in RFC 2047 encoded-word: " + word); 408 } 409 410 String encoding = word.substring(charsetPos + 1, encodingPos); 411 412 // and finally the encoded text. 413 int encodedTextPos = word.indexOf("?=", encodingPos + 1); 414 if (encodedTextPos == -1) { 415 throw new ParseException("Missing encoded text in RFC 2047 encoded-word: " + word); 416 } 417 418 String encodedText = word.substring(encodingPos + 1, encodedTextPos); 419 420 // seems a bit silly to encode a null string, but easy to deal with. 421 if (encodedText.length() == 0) { 422 return ""; 423 } 424 425 try { 426 // the decoder writes directly to an output stream. 427 ByteArrayOutputStream out = new ByteArrayOutputStream(encodedText.length()); 428 429 byte[] encodedData = encodedText.getBytes("US-ASCII"); 430 431 // Base64 encoded? 432 if (encoding.equals("B")) { 433 Base64.decode(encodedData, out); 434 } 435 // maybe quoted printable. 436 else if (encoding.equals("Q")) { 437 QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder(); 438 dataEncoder.decodeWord(encodedData, out); 439 } 440 else { 441 throw new UnsupportedEncodingException("Unknown RFC 2047 encoding: " + encoding); 442 } 443 // get the decoded byte data and convert into a string. 444 byte[] decodedData = out.toByteArray(); 445 return new String(decodedData, javaCharset(charset)); 446 } catch (IOException e) { 447 throw new UnsupportedEncodingException("Invalid RFC 2047 encoding"); 448 } 449 450 } 451 452 /** 453 * Wrap an encoder around a given output stream. 454 * 455 * @param out The output stream to wrap. 456 * @param encoding The name of the encoding. 457 * 458 * @return A instance of FilterOutputStream that manages on the fly 459 * encoding for the requested encoding type. 460 * @exception MessagingException 461 */ 462 public static OutputStream encode(OutputStream out, String encoding) throws MessagingException { 463 // no encoding specified, so assume it goes out unchanged. 464 if (encoding == null) { 465 return out; 466 } 467 468 encoding = encoding.toLowerCase(); 469 470 // some encodies are just pass-throughs, with no real decoding. 471 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) { 472 return out; 473 } 474 else if (encoding.equals("base64")) { 475 return new Base64EncoderStream(out); 476 } 477 // UUEncode is known by a couple historical extension names too. 478 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) { 479 return new UUEncoderStream(out); 480 } 481 else if (encoding.equals("quoted-printable")) { 482 return new QuotedPrintableEncoderStream(out); 483 } 484 else { 485 throw new MessagingException("Unknown encoding " + encoding); 486 } 487 } 488 489 /** 490 * Wrap an encoder around a given output stream. 491 * 492 * @param out The output stream to wrap. 493 * @param encoding The name of the encoding. 494 * @param filename The filename of the data being sent (only used for UUEncode). 495 * 496 * @return A instance of FilterOutputStream that manages on the fly 497 * encoding for the requested encoding type. 498 * @exception MessagingException 499 */ 500 public static OutputStream encode(OutputStream out, String encoding, String filename) throws MessagingException { 501 encoding = encoding.toLowerCase(); 502 503 // some encodies are just pass-throughs, with no real decoding. 504 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) { 505 return out; 506 } 507 else if (encoding.equals("base64")) { 508 return new Base64EncoderStream(out); 509 } 510 // UUEncode is known by a couple historical extension names too. 511 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) { 512 return new UUEncoderStream(out, filename); 513 } 514 else if (encoding.equals("quoted-printable")) { 515 return new QuotedPrintableEncoderStream(out); 516 } 517 else { 518 throw new MessagingException("Unknown encoding " + encoding); 519 } 520 } 521 522 523 public static String encodeText(String word) throws UnsupportedEncodingException { 524 return encodeText(word, null, null); 525 } 526 527 public static String encodeText(String word, String charset, String encoding) throws UnsupportedEncodingException { 528 return encodeWord(word, charset, encoding, false); 529 } 530 531 public static String encodeWord(String word) throws UnsupportedEncodingException { 532 return encodeWord(word, null, null); 533 } 534 535 public static String encodeWord(String word, String charset, String encoding) throws UnsupportedEncodingException { 536 return encodeWord(word, charset, encoding, true); 537 } 538 539 540 private static String encodeWord(String word, String charset, String encoding, boolean encodingWord) throws UnsupportedEncodingException { 541 542 // figure out what we need to encode this. 543 String encoder = ASCIIUtil.getTextTransferEncoding(word); 544 // all ascii? We can return this directly, 545 if (encoder.equals("7bit")) { 546 return word; 547 } 548 549 // if not given a charset, use the default. 550 if (charset == null) { 551 charset = getDefaultMIMECharset(); 552 } 553 554 // sort out the encoder. If not explicitly given, use the best guess we've already established. 555 if (encoding != null) { 556 if (encoding.equalsIgnoreCase("B")) { 557 encoder = "base64"; 558 } 559 else if (encoding.equalsIgnoreCase("Q")) { 560 encoder = "quoted-printable"; 561 } 562 else { 563 throw new UnsupportedEncodingException("Unknown transfer encoding: " + encoding); 564 } 565 } 566 567 try { 568 // get the string bytes in the correct source charset 569 InputStream in = new ByteArrayInputStream(word.getBytes( javaCharset(charset))); 570 ByteArrayOutputStream out = new ByteArrayOutputStream(); 571 572 if (encoder.equals("base64")) { 573 Base64Encoder dataEncoder = new Base64Encoder(); 574 dataEncoder.encodeWord(in, charset, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false)); 575 } 576 else { 577 QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder(); 578 dataEncoder.encodeWord(in, charset, encodingWord ? QP_WORD_SPECIALS : QP_TEXT_SPECIALS, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false)); 579 } 580 581 byte[] bytes = out.toByteArray(); 582 return new String(bytes); 583 } catch (IOException e) { 584 throw new UnsupportedEncodingException("Invalid encoding"); 585 } 586 } 587 588 589 /** 590 * Examine the content of a data source and decide what type 591 * of transfer encoding should be used. For text streams, 592 * we'll decided between 7bit, quoted-printable, and base64. 593 * For binary content types, we'll use either 7bit or base64. 594 * 595 * @param handler The DataHandler associated with the content. 596 * 597 * @return The string name of an encoding used to transfer the content. 598 */ 599 public static String getEncoding(DataHandler handler) { 600 601 602 // if this handler has an associated data source, we can read directly from the 603 // data source to make this judgment. This is generally MUCH faster than asking the 604 // DataHandler to write out the data for us. 605 DataSource ds = handler.getDataSource(); 606 if (ds != null) { 607 return getEncoding(ds); 608 } 609 610 try { 611 // get a parser that allows us to make comparisons. 612 ContentType content = new ContentType(ds.getContentType()); 613 614 // The only access to the content bytes at this point is by asking the handler to write 615 // the information out to a stream. We're going to pipe this through a special stream 616 // that examines the bytes as they go by. 617 ContentCheckingOutputStream checker = new ContentCheckingOutputStream(); 618 619 handler.writeTo(checker); 620 621 // figure this out based on whether we believe this to be a text type or not. 622 if (content.match("text/*")) { 623 return checker.getTextTransferEncoding(); 624 } 625 else { 626 return checker.getBinaryTransferEncoding(); 627 } 628 629 } catch (Exception e) { 630 // any unexpected I/O exceptions we'll force to a "safe" fallback position. 631 return "base64"; 632 } 633 } 634 635 636 /** 637 * Determine the what transfer encoding should be used for 638 * data retrieved from a DataSource. 639 * 640 * @param source The DataSource for the transmitted data. 641 * 642 * @return The string name of the encoding form that should be used for 643 * the data. 644 */ 645 public static String getEncoding(DataSource source) { 646 InputStream in = null; 647 648 try { 649 // get a parser that allows us to make comparisons. 650 ContentType content = new ContentType(source.getContentType()); 651 652 // we're probably going to have to scan the data. 653 in = source.getInputStream(); 654 655 if (!content.match("text/*")) { 656 // Not purporting to be a text type? Examine the content to see we might be able to 657 // at least pretend it is an ascii type. 658 return ASCIIUtil.getBinaryTransferEncoding(in); 659 } 660 else { 661 return ASCIIUtil.getTextTransferEncoding(in); 662 } 663 } catch (Exception e) { 664 // this was a problem...not sure what makes sense here, so we'll assume it's binary 665 // and we need to transfer this using Base64 encoding. 666 return "base64"; 667 } finally { 668 // make sure we close the stream 669 try { 670 if (in != null) { 671 in.close(); 672 } 673 } catch (IOException e) { 674 } 675 } 676 } 677 678 679 /** 680 * Quote a "word" value. If the word contains any character from 681 * the specified "specials" list, this value is returned as a 682 * quoted strong. Otherwise, it is returned unchanged (an "atom"). 683 * 684 * @param word The word requiring quoting. 685 * @param specials The set of special characters that can't appear in an unquoted 686 * string. 687 * 688 * @return The quoted value. This will be unchanged if the word doesn't contain 689 * any of the designated special characters. 690 */ 691 public static String quote(String word, String specials) { 692 int wordLength = word.length(); 693 boolean requiresQuoting = false; 694 // scan the string looking for problem characters 695 for (int i =0; i < wordLength; i++) { 696 char ch = word.charAt(i); 697 // special escaped characters require escaping, which also implies quoting. 698 if (escapedChars.indexOf(ch) >= 0) { 699 return quoteAndEscapeString(word); 700 } 701 // now check for control characters or the designated special characters. 702 if (ch < 32 || ch >= 127 || specials.indexOf(ch) >= 0) { 703 // we know this requires quoting, but we still need to scan the entire string to 704 // see if contains chars that require escaping. Just go ahead and treat it as if it does. 705 return quoteAndEscapeString(word); 706 } 707 } 708 return word; 709 } 710 711 /** 712 * Take a string and return it as a formatted quoted string, with 713 * all characters requiring escaping handled properly. 714 * 715 * @param word The string to quote. 716 * 717 * @return The quoted string. 718 */ 719 private static String quoteAndEscapeString(String word) { 720 int wordLength = word.length(); 721 // allocate at least enough for the string and two quotes plus a reasonable number of escaped chars. 722 StringBuffer buffer = new StringBuffer(wordLength + 10); 723 // add the leading quote. 724 buffer.append('"'); 725 726 for (int i = 0; i < wordLength; i++) { 727 char ch = word.charAt(i); 728 // is this an escaped char? 729 if (escapedChars.indexOf(ch) >= 0) { 730 // add the escape marker before appending. 731 buffer.append('\\'); 732 } 733 buffer.append(ch); 734 } 735 // now the closing quote 736 buffer.append('"'); 737 return buffer.toString(); 738 } 739 740 /** 741 * Translate a MIME standard character set name into the Java 742 * equivalent. 743 * 744 * @param charset The MIME standard name. 745 * 746 * @return The Java equivalent for this name. 747 */ 748 public static String javaCharset(String charset) { 749 // nothing in, nothing out. 750 if (charset == null) { 751 return null; 752 } 753 754 String mappedCharset = (String)mime2java.get(charset.toLowerCase()); 755 // if there is no mapping, then the original name is used. Many of the MIME character set 756 // names map directly back into Java. The reverse isn't necessarily true. 757 return mappedCharset == null ? charset : mappedCharset; 758 } 759 760 /** 761 * Map a Java character set name into the MIME equivalent. 762 * 763 * @param charset The java character set name. 764 * 765 * @return The MIME standard equivalent for this character set name. 766 */ 767 public static String mimeCharset(String charset) { 768 // nothing in, nothing out. 769 if (charset == null) { 770 return null; 771 } 772 773 String mappedCharset = (String)java2mime.get(charset.toLowerCase()); 774 // if there is no mapping, then the original name is used. Many of the MIME character set 775 // names map directly back into Java. The reverse isn't necessarily true. 776 return mappedCharset == null ? charset : mappedCharset; 777 } 778 779 780 /** 781 * Get the default character set to use, in Java name format. 782 * This either be the value set with the mail.mime.charset 783 * system property or obtained from the file.encoding system 784 * property. If neither of these is set, we fall back to 785 * 8859_1 (basically US-ASCII). 786 * 787 * @return The character string value of the default character set. 788 */ 789 public static String getDefaultJavaCharset() { 790 String charset = SessionUtil.getProperty("mail.mime.charset"); 791 if (charset != null) { 792 return javaCharset(charset); 793 } 794 return SessionUtil.getProperty("file.encoding", "8859_1"); 795 } 796 797 /** 798 * Get the default character set to use, in MIME name format. 799 * This either be the value set with the mail.mime.charset 800 * system property or obtained from the file.encoding system 801 * property. If neither of these is set, we fall back to 802 * 8859_1 (basically US-ASCII). 803 * 804 * @return The character string value of the default character set. 805 */ 806 static String getDefaultMIMECharset() { 807 // if the property is specified, this can be used directly. 808 String charset = SessionUtil.getProperty("mail.mime.charset"); 809 if (charset != null) { 810 return charset; 811 } 812 813 // get the Java-defined default and map back to a MIME name. 814 return mimeCharset(SessionUtil.getProperty("file.encoding", "8859_1")); 815 } 816 817 818 /** 819 * Load the default mapping tables used by the javaCharset() 820 * and mimeCharset() methods. By default, these tables are 821 * loaded from the /META-INF/javamail.charset.map file. If 822 * something goes wrong loading that file, we configure things 823 * with a default mapping table (which just happens to mimic 824 * what's in the default mapping file). 825 */ 826 static private void loadCharacterSetMappings() { 827 java2mime = new HashMap(); 828 mime2java = new HashMap(); 829 830 831 // normally, these come from a character map file contained in the jar file. 832 try { 833 InputStream map = javax.mail.internet.MimeUtility.class.getResourceAsStream("/META-INF/javamail.charset.map"); 834 835 if (map != null) { 836 // get a reader for this so we can load. 837 BufferedReader reader = new BufferedReader(new InputStreamReader(map)); 838 839 readMappings(reader, java2mime); 840 readMappings(reader, mime2java); 841 } 842 } catch (Exception e) { 843 } 844 845 // if any sort of error occurred reading the preferred file version, we could end up with empty 846 // mapping tables. This could cause all sorts of difficulty, so ensure they are populated with at 847 // least a reasonable set of defaults. 848 849 // these mappings echo what's in the default file. 850 if (java2mime.isEmpty()) { 851 java2mime.put("8859_1", "ISO-8859-1"); 852 java2mime.put("iso8859_1", "ISO-8859-1"); 853 java2mime.put("iso8859-1", "ISO-8859-1"); 854 855 java2mime.put("8859_2", "ISO-8859-2"); 856 java2mime.put("iso8859_2", "ISO-8859-2"); 857 java2mime.put("iso8859-2", "ISO-8859-2"); 858 859 java2mime.put("8859_3", "ISO-8859-3"); 860 java2mime.put("iso8859_3", "ISO-8859-3"); 861 java2mime.put("iso8859-3", "ISO-8859-3"); 862 863 java2mime.put("8859_4", "ISO-8859-4"); 864 java2mime.put("iso8859_4", "ISO-8859-4"); 865 java2mime.put("iso8859-4", "ISO-8859-4"); 866 867 java2mime.put("8859_5", "ISO-8859-5"); 868 java2mime.put("iso8859_5", "ISO-8859-5"); 869 java2mime.put("iso8859-5", "ISO-8859-5"); 870 871 java2mime.put ("8859_6", "ISO-8859-6"); 872 java2mime.put("iso8859_6", "ISO-8859-6"); 873 java2mime.put("iso8859-6", "ISO-8859-6"); 874 875 java2mime.put("8859_7", "ISO-8859-7"); 876 java2mime.put("iso8859_7", "ISO-8859-7"); 877 java2mime.put("iso8859-7", "ISO-8859-7"); 878 879 java2mime.put("8859_8", "ISO-8859-8"); 880 java2mime.put("iso8859_8", "ISO-8859-8"); 881 java2mime.put("iso8859-8", "ISO-8859-8"); 882 883 java2mime.put("8859_9", "ISO-8859-9"); 884 java2mime.put("iso8859_9", "ISO-8859-9"); 885 java2mime.put("iso8859-9", "ISO-8859-9"); 886 887 java2mime.put("sjis", "Shift_JIS"); 888 java2mime.put ("jis", "ISO-2022-JP"); 889 java2mime.put("iso2022jp", "ISO-2022-JP"); 890 java2mime.put("euc_jp", "euc-jp"); 891 java2mime.put("koi8_r", "koi8-r"); 892 java2mime.put("euc_cn", "euc-cn"); 893 java2mime.put("euc_tw", "euc-tw"); 894 java2mime.put("euc_kr", "euc-kr"); 895 } 896 897 if (mime2java.isEmpty ()) { 898 mime2java.put("iso-2022-cn", "ISO2022CN"); 899 mime2java.put("iso-2022-kr", "ISO2022KR"); 900 mime2java.put("utf-8", "UTF8"); 901 mime2java.put("utf8", "UTF8"); 902 mime2java.put("ja_jp.iso2022-7", "ISO2022JP"); 903 mime2java.put("ja_jp.eucjp", "EUCJIS"); 904 mime2java.put ("euc-kr", "KSC5601"); 905 mime2java.put("euckr", "KSC5601"); 906 mime2java.put("us-ascii", "ISO-8859-1"); 907 mime2java.put("x-us-ascii", "ISO-8859-1"); 908 } 909 } 910 911 912 /** 913 * Read a section of a character map table and populate the 914 * target mapping table with the information. The table end 915 * is marked by a line starting with "--" and also ending with 916 * "--". Blank lines and comment lines (beginning with '#') are 917 * ignored. 918 * 919 * @param reader The source of the file information. 920 * @param table The mapping table used to store the information. 921 */ 922 static private void readMappings(BufferedReader reader, Map table) throws IOException { 923 // process lines to the EOF or the end of table marker. 924 while (true) { 925 String line = reader.readLine(); 926 // no line returned is an EOF 927 if (line == null) { 928 return; 929 } 930 931 // trim so we're not messed up by trailing blanks 932 line = line.trim(); 933 934 if (line.length() == 0 || line.startsWith("#")) { 935 continue; 936 } 937 938 // stop processing if this is the end-of-table marker. 939 if (line.startsWith("--") && line.endsWith("--")) { 940 return; 941 } 942 943 // we allow either blanks or tabs as token delimiters. 944 StringTokenizer tokenizer = new StringTokenizer(line, " \t"); 945 946 try { 947 String from = tokenizer.nextToken().toLowerCase(); 948 String to = tokenizer.nextToken(); 949 950 table.put(from, to); 951 } catch (NoSuchElementException e) { 952 // just ignore the line if invalid. 953 } 954 } 955 } 956 957 958 /** 959 * Perform RFC 2047 text folding on a string of text. 960 * 961 * @param used The amount of text already "used up" on this line. This is 962 * typically the length of a message header that this text 963 * get getting added to. 964 * @param s The text to fold. 965 * 966 * @return The input text, with linebreaks inserted at appropriate fold points. 967 */ 968 public static String fold(int used, String s) { 969 // if folding is disable, unfolding is also. Return the string unchanged. 970 if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) { 971 return s; 972 } 973 974 int end; 975 976 // now we need to strip off any trailing "whitespace", where whitespace is blanks, tabs, 977 // and line break characters. 978 for (end = s.length() - 1; end >= 0; end--) { 979 int ch = s.charAt(end); 980 if (ch != ' ' && ch != '\t' ) { 981 break; 982 } 983 } 984 985 // did we actually find something to remove? Shorten the String to the trimmed length 986 if (end != s.length() - 1) { 987 s = s.substring(0, end + 1); 988 } 989 990 // does the string as it exists now not require folding? We can just had that back right off. 991 if (s.length() + used <= FOLD_THRESHOLD) { 992 return s; 993 } 994 995 // get a buffer for the length of the string, plus room for a few line breaks. 996 // these are soft line breaks, so we generally need more that just the line breaks (an escape + 997 // CR + LF + leading space on next line); 998 StringBuffer newString = new StringBuffer(s.length() + 8); 999 1000 1001 // now keep chopping this down until we've accomplished what we need. 1002 while (used + s.length() > FOLD_THRESHOLD) { 1003 int breakPoint = -1; 1004 char breakChar = 0; 1005 1006 // now scan for the next place where we can break. 1007 for (int i = 0; i < s.length(); i++) { 1008 // have we passed the fold limit? 1009 if (used + i > FOLD_THRESHOLD) { 1010 // if we've already seen a blank, then stop now. Otherwise 1011 // we keep going until we hit a fold point. 1012 if (breakPoint != -1) { 1013 break; 1014 } 1015 } 1016 char ch = s.charAt(i); 1017 1018 // a white space character? 1019 if (ch == ' ' || ch == '\t') { 1020 // this might be a run of white space, so skip over those now. 1021 breakPoint = i; 1022 // we need to maintain the same character type after the inserted linebreak. 1023 breakChar = ch; 1024 i++; 1025 while (i < s.length()) { 1026 ch = s.charAt(i); 1027 if (ch != ' ' && ch != '\t') { 1028 break; 1029 } 1030 i++; 1031 } 1032 } 1033 // found an embedded new line. Escape this so that the unfolding process preserves it. 1034 else if (ch == '\n') { 1035 newString.append('\\'); 1036 newString.append('\n'); 1037 } 1038 else if (ch == '\r') { 1039 newString.append('\\'); 1040 newString.append('\n'); 1041 i++; 1042 // if this is a CRLF pair, add the second char also 1043 if (i < s.length() && s.charAt(i) == '\n') { 1044 newString.append('\r'); 1045 } 1046 } 1047 1048 } 1049 // no fold point found, we punt, append the remainder and leave. 1050 if (breakPoint == -1) { 1051 newString.append(s); 1052 return newString.toString(); 1053 } 1054 newString.append(s.substring(0, breakPoint)); 1055 newString.append("\r\n"); 1056 newString.append(breakChar); 1057 // chop the string 1058 s = s.substring(breakPoint + 1); 1059 // start again, and we've used the first char of the limit already with the whitespace char. 1060 used = 1; 1061 } 1062 1063 // add on the remainder, and return 1064 newString.append(s); 1065 return newString.toString(); 1066 } 1067 1068 /** 1069 * Unfold a folded string. The unfolding process will remove 1070 * any line breaks that are not escaped and which are also followed 1071 * by whitespace characters. 1072 * 1073 * @param s The folded string. 1074 * 1075 * @return A new string with unfolding rules applied. 1076 */ 1077 public static String unfold(String s) { 1078 // if folding is disable, unfolding is also. Return the string unchanged. 1079 if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) { 1080 return s; 1081 } 1082 1083 // if there are no line break characters in the string, we can just return this. 1084 if (s.indexOf('\n') < 0 && s.indexOf('\r') < 0) { 1085 return s; 1086 } 1087 1088 // we need to scan and fix things up. 1089 int length = s.length(); 1090 1091 StringBuffer newString = new StringBuffer(length); 1092 1093 // scan the entire string 1094 for (int i = 0; i < length; i++) { 1095 char ch = s.charAt(i); 1096 1097 // we have a backslash. In folded strings, escape characters are only processed as such if 1098 // they preceed line breaks. Otherwise, we leave it be. 1099 if (ch == '\\') { 1100 // escape at the very end? Just add the character. 1101 if (i == length - 1) { 1102 newString.append(ch); 1103 } 1104 else { 1105 int nextChar = s.charAt(i + 1); 1106 1107 // naked newline? Add the new line to the buffer, and skip the escape char. 1108 if (nextChar == '\n') { 1109 newString.append('\n'); 1110 i++; 1111 } 1112 else if (nextChar == '\r') { 1113 // just the CR left? Add it, removing the escape. 1114 if (i == length - 2 || s.charAt(i + 2) != '\r') { 1115 newString.append('\r'); 1116 i++; 1117 } 1118 else { 1119 // toss the escape, add both parts of the CRLF, and skip over two chars. 1120 newString.append('\r'); 1121 newString.append('\n'); 1122 i += 2; 1123 } 1124 } 1125 else { 1126 // an escape for another purpose, just copy it over. 1127 newString.append(ch); 1128 } 1129 } 1130 } 1131 // we have an unescaped line break 1132 else if (ch == '\n' || ch == '\r') { 1133 // remember the position in case we need to backtrack. 1134 int lineBreak = i; 1135 boolean CRLF = false; 1136 1137 if (ch == '\r') { 1138 // check to see if we need to step over this. 1139 if (i < length - 1 && s.charAt(i + 1) == '\n') { 1140 i++; 1141 // flag the type so we know what we might need to preserve. 1142 CRLF = true; 1143 } 1144 } 1145 1146 // get a temp position scanner. 1147 int scan = i + 1; 1148 1149 // does a blank follow this new line? we need to scrap the new line and reduce the leading blanks 1150 // down to a single blank. 1151 if (scan < length && s.charAt(scan) == ' ') { 1152 // add the character 1153 newString.append(' '); 1154 1155 // scan over the rest of the blanks 1156 i = scan + 1; 1157 while (i < length && s.charAt(i) == ' ') { 1158 i++; 1159 } 1160 // we'll increment down below, so back up to the last blank as the current char. 1161 i--; 1162 } 1163 else { 1164 // we must keep this line break. Append the appropriate style. 1165 if (CRLF) { 1166 newString.append("\r\n"); 1167 } 1168 else { 1169 newString.append(ch); 1170 } 1171 } 1172 } 1173 else { 1174 // just a normal, ordinary character 1175 newString.append(ch); 1176 } 1177 } 1178 return newString.toString(); 1179 } 1180 } 1181 1182 1183 /** 1184 * Utility class for examining content information written out 1185 * by a DataHandler object. This stream gathers statistics on 1186 * the stream so it can make transfer encoding determinations. 1187 */ 1188 class ContentCheckingOutputStream extends OutputStream { 1189 private int asciiChars = 0; 1190 private int nonAsciiChars = 0; 1191 private boolean containsLongLines = false; 1192 private boolean containsMalformedEOL = false; 1193 private int previousChar = 0; 1194 private int span = 0; 1195 1196 ContentCheckingOutputStream() { 1197 } 1198 1199 public void write(byte[] data) throws IOException { 1200 write(data, 0, data.length); 1201 } 1202 1203 public void write(byte[] data, int offset, int length) throws IOException { 1204 for (int i = 0; i < length; i++) { 1205 write(data[offset + i]); 1206 } 1207 } 1208 1209 public void write(int ch) { 1210 // we found a linebreak. Reset the line length counters on either one. We don't 1211 // really need to validate here. 1212 if (ch == '\n' || ch == '\r') { 1213 // we found a newline, this is only valid if the previous char was the '\r' 1214 if (ch == '\n') { 1215 // malformed linebreak? force this to base64 encoding. 1216 if (previousChar != '\r') { 1217 containsMalformedEOL = true; 1218 } 1219 } 1220 // hit a line end, reset our line length counter 1221 span = 0; 1222 } 1223 else { 1224 span++; 1225 // the text has long lines, we can't transfer this as unencoded text. 1226 if (span > 998) { 1227 containsLongLines = true; 1228 } 1229 1230 // non-ascii character, we have to transfer this in binary. 1231 if (!ASCIIUtil.isAscii(ch)) { 1232 nonAsciiChars++; 1233 } 1234 else { 1235 asciiChars++; 1236 } 1237 } 1238 previousChar = ch; 1239 } 1240 1241 1242 public String getBinaryTransferEncoding() { 1243 if (nonAsciiChars != 0 || containsLongLines || containsMalformedEOL) { 1244 return "base64"; 1245 } 1246 else { 1247 return "7bit"; 1248 } 1249 } 1250 1251 public String getTextTransferEncoding() { 1252 // looking good so far, only valid chars here. 1253 if (nonAsciiChars == 0) { 1254 // does this contain long text lines? We need to use a Q-P encoding which will 1255 // be only slightly longer, but handles folding the longer lines. 1256 if (containsLongLines) { 1257 return "quoted-printable"; 1258 } 1259 else { 1260 // ideal! Easiest one to handle. 1261 return "7bit"; 1262 } 1263 } 1264 else { 1265 // mostly characters requiring encoding? Base64 is our best bet. 1266 if (nonAsciiChars > asciiChars) { 1267 return "base64"; 1268 } 1269 else { 1270 // Q-P encoding will use fewer bytes than the full Base64. 1271 return "quoted-printable"; 1272 } 1273 } 1274 } 1275 }