001 /** 002 * 003 * Copyright 2003-2004 The Apache Software Foundation 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package javax.mail.internet; 019 020 import java.io.BufferedInputStream; 021 import java.io.BufferedReader; 022 import java.io.ByteArrayInputStream; 023 import java.io.ByteArrayOutputStream; 024 import java.io.IOException; 025 import java.io.InputStream; 026 import java.io.InputStreamReader; 027 import java.io.OutputStream; 028 import java.io.UnsupportedEncodingException; 029 import java.util.HashMap; 030 import java.util.Map; 031 import java.util.NoSuchElementException; 032 import java.util.StringTokenizer; 033 034 import javax.activation.DataHandler; 035 import javax.activation.DataSource; 036 import javax.mail.MessagingException; 037 038 import org.apache.geronimo.mail.util.ASCIIUtil; 039 import org.apache.geronimo.mail.util.Base64; 040 import org.apache.geronimo.mail.util.Base64DecoderStream; 041 import org.apache.geronimo.mail.util.Base64Encoder; 042 import org.apache.geronimo.mail.util.Base64EncoderStream; 043 import org.apache.geronimo.mail.util.QuotedPrintableDecoderStream; 044 import org.apache.geronimo.mail.util.QuotedPrintableEncoderStream; 045 import org.apache.geronimo.mail.util.QuotedPrintableEncoder; 046 import org.apache.geronimo.mail.util.QuotedPrintable; 047 import org.apache.geronimo.mail.util.SessionUtil; 048 import org.apache.geronimo.mail.util.UUDecoderStream; 049 import org.apache.geronimo.mail.util.UUEncoderStream; 050 051 // encodings include "base64", "quoted-printable", "7bit", "8bit" and "binary". 052 // In addition, "uuencode" is also supported. The 053 054 /** 055 * @version $Rev: 412426 $ $Date: 2006-06-07 08:21:46 -0700 (Wed, 07 Jun 2006) $ 056 */ 057 public class MimeUtility { 058 059 private static final String MIME_FOLDENCODEDWORDS = "mail.mime.foldencodedwords"; 060 private static final String MIME_DECODE_TEXT_STRICT = "mail.mime.decodetext.strict"; 061 062 private MimeUtility() { 063 } 064 065 public static final int ALL = -1; 066 067 private static String defaultJavaCharset; 068 private static String escapedChars = "\"\\\r\n"; 069 private static String linearWhiteSpace = " \t\r\n"; 070 071 private static String QP_WORD_SPECIALS = "=_?\"#$%&'(),.:;<>@[\\]^`{|}~"; 072 private static String QP_TEXT_SPECIALS = "=_?"; 073 074 // the javamail spec includes the ability to map java encoding names to MIME-specified names. Normally, 075 // these values are loaded from a character mapping file. 076 private static Map java2mime; 077 private static Map mime2java; 078 079 static { 080 // we need to load the mapping tables used by javaCharset() and mimeCharset(). 081 loadCharacterSetMappings(); 082 } 083 084 public static InputStream decode(InputStream in, String encoding) throws MessagingException { 085 encoding = encoding.toLowerCase(); 086 087 // some encodies are just pass-throughs, with no real decoding. 088 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) { 089 return in; 090 } 091 else if (encoding.equals("base64")) { 092 return new Base64DecoderStream(in); 093 } 094 // UUEncode is known by a couple historical extension names too. 095 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) { 096 return new UUDecoderStream(in); 097 } 098 else if (encoding.equals("quoted-printable")) { 099 return new QuotedPrintableDecoderStream(in); 100 } 101 else { 102 throw new MessagingException("Unknown encoding " + encoding); 103 } 104 } 105 106 /** 107 * Decode a string of text obtained from a mail header into 108 * it's proper form. The text generally will consist of a 109 * string of tokens, some of which may be encoded using 110 * base64 encoding. 111 * 112 * @param text The text to decode. 113 * 114 * @return The decoded test string. 115 * @exception UnsupportedEncodingException 116 */ 117 public static String decodeText(String text) throws UnsupportedEncodingException { 118 // if the text contains any encoded tokens, those tokens will be marked with "=?". If the 119 // source string doesn't contain that sequent, no decoding is required. 120 if (text.indexOf("=?") < 0) { 121 return text; 122 } 123 124 // we have two sets of rules we can apply. 125 if (!SessionUtil.getBooleanProperty(MIME_DECODE_TEXT_STRICT, true)) { 126 return decodeTextNonStrict(text); 127 } 128 129 int offset = 0; 130 int endOffset = text.length(); 131 132 int startWhiteSpace = -1; 133 int endWhiteSpace = -1; 134 135 StringBuffer decodedText = new StringBuffer(text.length()); 136 137 boolean previousTokenEncoded = false; 138 139 while (offset < endOffset) { 140 char ch = text.charAt(offset); 141 142 // is this a whitespace character? 143 if (linearWhiteSpace.indexOf(ch) != -1) { 144 startWhiteSpace = offset; 145 while (offset < endOffset) { 146 // step over the white space characters. 147 ch = text.charAt(offset); 148 if (linearWhiteSpace.indexOf(ch) != -1) { 149 offset++; 150 } 151 else { 152 // record the location of the first non lwsp and drop down to process the 153 // token characters. 154 endWhiteSpace = offset; 155 break; 156 } 157 } 158 } 159 else { 160 // we have a word token. We need to scan over the word and then try to parse it. 161 int wordStart = offset; 162 163 while (offset < endOffset) { 164 // step over the white space characters. 165 ch = text.charAt(offset); 166 if (linearWhiteSpace.indexOf(ch) == -1) { 167 offset++; 168 } 169 else { 170 break; 171 } 172 173 //NB: Trailing whitespace on these header strings will just be discarded. 174 } 175 // pull out the word token. 176 String word = text.substring(wordStart, offset); 177 // is the token encoded? decode the word 178 if (word.startsWith("=?")) { 179 try { 180 // if this gives a parsing failure, treat it like a non-encoded word. 181 String decodedWord = decodeWord(word); 182 183 // are any whitespace characters significant? Append 'em if we've got 'em. 184 if (!previousTokenEncoded) { 185 if (startWhiteSpace != -1) { 186 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace)); 187 startWhiteSpace = -1; 188 } 189 } 190 // this is definitely a decoded token. 191 previousTokenEncoded = true; 192 // and add this to the text. 193 decodedText.append(decodedWord); 194 // we continue parsing from here...we allow parsing errors to fall through 195 // and get handled as normal text. 196 continue; 197 198 } catch (ParseException e) { 199 } 200 } 201 // this is a normal token, so it doesn't matter what the previous token was. Add the white space 202 // if we have it. 203 if (startWhiteSpace != -1) { 204 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace)); 205 startWhiteSpace = -1; 206 } 207 // this is not a decoded token. 208 previousTokenEncoded = false; 209 decodedText.append(word); 210 } 211 } 212 213 return decodedText.toString(); 214 } 215 216 217 /** 218 * Decode a string of text obtained from a mail header into 219 * it's proper form. The text generally will consist of a 220 * string of tokens, some of which may be encoded using 221 * base64 encoding. This is for non-strict decoded for mailers that 222 * violate the RFC 2047 restriction that decoded tokens must be delimited 223 * by linear white space. This will scan tokens looking for inner tokens 224 * enclosed in "=?" -- "?=" pairs. 225 * 226 * @param text The text to decode. 227 * 228 * @return The decoded test string. 229 * @exception UnsupportedEncodingException 230 */ 231 private static String decodeTextNonStrict(String text) throws UnsupportedEncodingException { 232 int offset = 0; 233 int endOffset = text.length(); 234 235 int startWhiteSpace = -1; 236 int endWhiteSpace = -1; 237 238 StringBuffer decodedText = new StringBuffer(text.length()); 239 240 boolean previousTokenEncoded = false; 241 242 while (offset < endOffset) { 243 char ch = text.charAt(offset); 244 245 // is this a whitespace character? 246 if (linearWhiteSpace.indexOf(ch) != -1) { 247 startWhiteSpace = offset; 248 while (offset < endOffset) { 249 // step over the white space characters. 250 ch = text.charAt(offset); 251 if (linearWhiteSpace.indexOf(ch) != -1) { 252 offset++; 253 } 254 else { 255 // record the location of the first non lwsp and drop down to process the 256 // token characters. 257 endWhiteSpace = offset; 258 break; 259 } 260 } 261 } 262 else { 263 // we're at the start of a word token. We potentially need to break this up into subtokens 264 int wordStart = offset; 265 266 while (offset < endOffset) { 267 // step over the white space characters. 268 ch = text.charAt(offset); 269 if (linearWhiteSpace.indexOf(ch) == -1) { 270 offset++; 271 } 272 else { 273 break; 274 } 275 276 //NB: Trailing whitespace on these header strings will just be discarded. 277 } 278 // pull out the word token. 279 String word = text.substring(wordStart, offset); 280 281 int decodeStart = 0; 282 283 // now scan and process each of the bits within here. 284 while (decodeStart < word.length()) { 285 int tokenStart = word.indexOf("=?", decodeStart); 286 if (tokenStart == -1) { 287 // this is a normal token, so it doesn't matter what the previous token was. Add the white space 288 // if we have it. 289 if (startWhiteSpace != -1) { 290 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace)); 291 startWhiteSpace = -1; 292 } 293 // this is not a decoded token. 294 previousTokenEncoded = false; 295 decodedText.append(word.substring(decodeStart)); 296 // we're finished. 297 break; 298 } 299 // we have something to process 300 else { 301 // we might have a normal token preceeding this. 302 if (tokenStart != decodeStart) { 303 // this is a normal token, so it doesn't matter what the previous token was. Add the white space 304 // if we have it. 305 if (startWhiteSpace != -1) { 306 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace)); 307 startWhiteSpace = -1; 308 } 309 // this is not a decoded token. 310 previousTokenEncoded = false; 311 decodedText.append(word.substring(decodeStart, tokenStart)); 312 } 313 314 // now find the end marker. 315 int tokenEnd = word.indexOf("?=", tokenStart); 316 // sigh, an invalid token. Treat this as plain text. 317 if (tokenEnd == -1) { 318 // this is a normal token, so it doesn't matter what the previous token was. Add the white space 319 // if we have it. 320 if (startWhiteSpace != -1) { 321 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace)); 322 startWhiteSpace = -1; 323 } 324 // this is not a decoded token. 325 previousTokenEncoded = false; 326 decodedText.append(word.substring(tokenStart)); 327 // we're finished. 328 break; 329 } 330 else { 331 // update our ticker 332 decodeStart = tokenEnd + 2; 333 334 String token = word.substring(tokenStart, tokenEnd); 335 try { 336 // if this gives a parsing failure, treat it like a non-encoded word. 337 String decodedWord = decodeWord(token); 338 339 // are any whitespace characters significant? Append 'em if we've got 'em. 340 if (!previousTokenEncoded) { 341 if (startWhiteSpace != -1) { 342 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace)); 343 startWhiteSpace = -1; 344 } 345 } 346 // this is definitely a decoded token. 347 previousTokenEncoded = true; 348 // and add this to the text. 349 decodedText.append(decodedWord); 350 // we continue parsing from here...we allow parsing errors to fall through 351 // and get handled as normal text. 352 continue; 353 354 } catch (ParseException e) { 355 } 356 // this is a normal token, so it doesn't matter what the previous token was. Add the white space 357 // if we have it. 358 if (startWhiteSpace != -1) { 359 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace)); 360 startWhiteSpace = -1; 361 } 362 // this is not a decoded token. 363 previousTokenEncoded = false; 364 decodedText.append(token); 365 } 366 } 367 } 368 } 369 } 370 371 return decodedText.toString(); 372 } 373 374 /** 375 * Parse a string using the RFC 2047 rules for an "encoded-word" 376 * type. This encoding has the syntax: 377 * 378 * encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" 379 * 380 * @param word The possibly encoded word value. 381 * 382 * @return The decoded word. 383 * @exception ParseException 384 * @exception UnsupportedEncodingException 385 */ 386 public static String decodeWord(String word) throws ParseException, UnsupportedEncodingException { 387 // encoded words start with the characters "=?". If this not an encoded word, we throw a 388 // ParseException for the caller. 389 390 if (!word.startsWith("=?")) { 391 throw new ParseException("Invalid RFC 2047 encoded-word: " + word); 392 } 393 394 int charsetPos = word.indexOf('?', 2); 395 if (charsetPos == -1) { 396 throw new ParseException("Missing charset in RFC 2047 encoded-word: " + word); 397 } 398 399 // pull out the character set information (this is the MIME name at this point). 400 String charset = word.substring(2, charsetPos).toLowerCase(); 401 402 // now pull out the encoding token the same way. 403 int encodingPos = word.indexOf('?', charsetPos + 1); 404 if (encodingPos == -1) { 405 throw new ParseException("Missing encoding in RFC 2047 encoded-word: " + word); 406 } 407 408 String encoding = word.substring(charsetPos + 1, encodingPos); 409 410 // and finally the encoded text. 411 int encodedTextPos = word.indexOf("?=", encodingPos + 1); 412 if (encodedTextPos == -1) { 413 throw new ParseException("Missing encoded text in RFC 2047 encoded-word: " + word); 414 } 415 416 String encodedText = word.substring(encodingPos + 1, encodedTextPos); 417 418 // seems a bit silly to encode a null string, but easy to deal with. 419 if (encodedText.length() == 0) { 420 return ""; 421 } 422 423 try { 424 // the decoder writes directly to an output stream. 425 ByteArrayOutputStream out = new ByteArrayOutputStream(encodedText.length()); 426 427 byte[] encodedData = encodedText.getBytes("US-ASCII"); 428 429 // Base64 encoded? 430 if (encoding.equals("B")) { 431 Base64.decode(encodedData, out); 432 } 433 // maybe quoted printable. 434 else if (encoding.equals("Q")) { 435 QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder(); 436 dataEncoder.decodeWord(encodedData, out); 437 } 438 else { 439 throw new UnsupportedEncodingException("Unknown RFC 2047 encoding: " + encoding); 440 } 441 // get the decoded byte data and convert into a string. 442 byte[] decodedData = out.toByteArray(); 443 return new String(decodedData, javaCharset(charset)); 444 } catch (IOException e) { 445 throw new UnsupportedEncodingException("Invalid RFC 2047 encoding"); 446 } 447 448 } 449 450 /** 451 * Wrap an encoder around a given output stream. 452 * 453 * @param out The output stream to wrap. 454 * @param encoding The name of the encoding. 455 * 456 * @return A instance of FilterOutputStream that manages on the fly 457 * encoding for the requested encoding type. 458 * @exception MessagingException 459 */ 460 public static OutputStream encode(OutputStream out, String encoding) throws MessagingException { 461 // no encoding specified, so assume it goes out unchanged. 462 if (encoding == null) { 463 return out; 464 } 465 466 encoding = encoding.toLowerCase(); 467 468 // some encodies are just pass-throughs, with no real decoding. 469 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) { 470 return out; 471 } 472 else if (encoding.equals("base64")) { 473 return new Base64EncoderStream(out); 474 } 475 // UUEncode is known by a couple historical extension names too. 476 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) { 477 return new UUEncoderStream(out); 478 } 479 else if (encoding.equals("quoted-printable")) { 480 return new QuotedPrintableEncoderStream(out); 481 } 482 else { 483 throw new MessagingException("Unknown encoding " + encoding); 484 } 485 } 486 487 /** 488 * Wrap an encoder around a given output stream. 489 * 490 * @param out The output stream to wrap. 491 * @param encoding The name of the encoding. 492 * @param filename The filename of the data being sent (only used for UUEncode). 493 * 494 * @return A instance of FilterOutputStream that manages on the fly 495 * encoding for the requested encoding type. 496 * @exception MessagingException 497 */ 498 public static OutputStream encode(OutputStream out, String encoding, String filename) throws MessagingException { 499 encoding = encoding.toLowerCase(); 500 501 // some encodies are just pass-throughs, with no real decoding. 502 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) { 503 return out; 504 } 505 else if (encoding.equals("base64")) { 506 return new Base64EncoderStream(out); 507 } 508 // UUEncode is known by a couple historical extension names too. 509 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) { 510 return new UUEncoderStream(out, filename); 511 } 512 else if (encoding.equals("quoted-printable")) { 513 return new QuotedPrintableEncoderStream(out); 514 } 515 else { 516 throw new MessagingException("Unknown encoding " + encoding); 517 } 518 } 519 520 521 public static String encodeText(String word) throws UnsupportedEncodingException { 522 return encodeText(word, null, null); 523 } 524 525 public static String encodeText(String word, String charset, String encoding) throws UnsupportedEncodingException { 526 return encodeWord(word, charset, encoding, false); 527 } 528 529 public static String encodeWord(String word) throws UnsupportedEncodingException { 530 return encodeWord(word, null, null); 531 } 532 533 public static String encodeWord(String word, String charset, String encoding) throws UnsupportedEncodingException { 534 return encodeWord(word, charset, encoding, true); 535 } 536 537 538 private static String encodeWord(String word, String charset, String encoding, boolean encodingWord) throws UnsupportedEncodingException { 539 540 // figure out what we need to encode this. 541 String encoder = ASCIIUtil.getTextTransferEncoding(word); 542 // all ascii? We can return this directly, 543 if (encoder.equals("7bit")) { 544 return word; 545 } 546 547 // if not given a charset, use the default. 548 if (charset == null) { 549 charset = getDefaultMIMECharset(); 550 } 551 552 // sort out the encoder. If not explicitly given, use the best guess we've already established. 553 if (encoding != null) { 554 if (encoding.equalsIgnoreCase("B")) { 555 encoder = "base64"; 556 } 557 else if (encoding.equalsIgnoreCase("Q")) { 558 encoder = "quoted-printable"; 559 } 560 else { 561 throw new UnsupportedEncodingException("Unknown transfer encoding: " + encoding); 562 } 563 } 564 565 try { 566 // get the string bytes in the correct source charset 567 InputStream in = new ByteArrayInputStream(word.getBytes( javaCharset(charset))); 568 ByteArrayOutputStream out = new ByteArrayOutputStream(); 569 570 if (encoder.equals("base64")) { 571 Base64Encoder dataEncoder = new Base64Encoder(); 572 dataEncoder.encodeWord(in, charset, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false)); 573 } 574 else { 575 QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder(); 576 dataEncoder.encodeWord(in, charset, encodingWord ? QP_WORD_SPECIALS : QP_TEXT_SPECIALS, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false)); 577 } 578 579 byte[] bytes = out.toByteArray(); 580 return new String(bytes); 581 } catch (IOException e) { 582 throw new UnsupportedEncodingException("Invalid encoding"); 583 } 584 } 585 586 587 /** 588 * Examine the content of a data source and decide what type 589 * of transfer encoding should be used. For text streams, 590 * we'll decided between 7bit, quoted-printable, and base64. 591 * For binary content types, we'll use either 7bit or base64. 592 * 593 * @param handler The DataHandler associated with the content. 594 * 595 * @return The string name of an encoding used to transfer the content. 596 */ 597 public static String getEncoding(DataHandler handler) { 598 599 600 // if this handler has an associated data source, we can read directly from the 601 // data source to make this judgment. This is generally MUCH faster than asking the 602 // DataHandler to write out the data for us. 603 DataSource ds = handler.getDataSource(); 604 if (ds != null) { 605 return getEncoding(ds); 606 } 607 608 try { 609 // get a parser that allows us to make comparisons. 610 ContentType content = new ContentType(ds.getContentType()); 611 612 // The only access to the content bytes at this point is by asking the handler to write 613 // the information out to a stream. We're going to pipe this through a special stream 614 // that examines the bytes as they go by. 615 ContentCheckingOutputStream checker = new ContentCheckingOutputStream(); 616 617 handler.writeTo(checker); 618 619 // figure this out based on whether we believe this to be a text type or not. 620 if (content.match("text/*")) { 621 return checker.getTextTransferEncoding(); 622 } 623 else { 624 return checker.getBinaryTransferEncoding(); 625 } 626 627 } catch (Exception e) { 628 // any unexpected I/O exceptions we'll force to a "safe" fallback position. 629 return "base64"; 630 } 631 } 632 633 634 /** 635 * Determine the what transfer encoding should be used for 636 * data retrieved from a DataSource. 637 * 638 * @param source The DataSource for the transmitted data. 639 * 640 * @return The string name of the encoding form that should be used for 641 * the data. 642 */ 643 public static String getEncoding(DataSource source) { 644 InputStream in = null; 645 646 try { 647 // get a parser that allows us to make comparisons. 648 ContentType content = new ContentType(source.getContentType()); 649 650 // we're probably going to have to scan the data. 651 in = source.getInputStream(); 652 653 if (!content.match("text/*")) { 654 // Not purporting to be a text type? Examine the content to see we might be able to 655 // at least pretend it is an ascii type. 656 return ASCIIUtil.getBinaryTransferEncoding(in); 657 } 658 else { 659 return ASCIIUtil.getTextTransferEncoding(in); 660 } 661 } catch (Exception e) { 662 // this was a problem...not sure what makes sense here, so we'll assume it's binary 663 // and we need to transfer this using Base64 encoding. 664 return "base64"; 665 } finally { 666 // make sure we close the stream 667 try { 668 if (in != null) { 669 in.close(); 670 } 671 } catch (IOException e) { 672 } 673 } 674 } 675 676 677 /** 678 * Quote a "word" value. If the word contains any character from 679 * the specified "specials" list, this value is returned as a 680 * quoted strong. Otherwise, it is returned unchanged (an "atom"). 681 * 682 * @param word The word requiring quoting. 683 * @param specials The set of special characters that can't appear in an unquoted 684 * string. 685 * 686 * @return The quoted value. This will be unchanged if the word doesn't contain 687 * any of the designated special characters. 688 */ 689 public static String quote(String word, String specials) { 690 int wordLength = word.length(); 691 boolean requiresQuoting = false; 692 // scan the string looking for problem characters 693 for (int i =0; i < wordLength; i++) { 694 char ch = word.charAt(i); 695 // special escaped characters require escaping, which also implies quoting. 696 if (escapedChars.indexOf(ch) >= 0) { 697 return quoteAndEscapeString(word); 698 } 699 // now check for control characters or the designated special characters. 700 if (ch < 32 || ch >= 127 || specials.indexOf(ch) >= 0) { 701 // we know this requires quoting, but we still need to scan the entire string to 702 // see if contains chars that require escaping. Just go ahead and treat it as if it does. 703 return quoteAndEscapeString(word); 704 } 705 } 706 return word; 707 } 708 709 /** 710 * Take a string and return it as a formatted quoted string, with 711 * all characters requiring escaping handled properly. 712 * 713 * @param word The string to quote. 714 * 715 * @return The quoted string. 716 */ 717 private static String quoteAndEscapeString(String word) { 718 int wordLength = word.length(); 719 // allocate at least enough for the string and two quotes plus a reasonable number of escaped chars. 720 StringBuffer buffer = new StringBuffer(wordLength + 10); 721 // add the leading quote. 722 buffer.append('"'); 723 724 for (int i = 0; i < wordLength; i++) { 725 char ch = word.charAt(i); 726 // is this an escaped char? 727 if (escapedChars.indexOf(ch) >= 0) { 728 // add the escape marker before appending. 729 buffer.append('\\'); 730 } 731 buffer.append(ch); 732 } 733 // now the closing quote 734 buffer.append('"'); 735 return buffer.toString(); 736 } 737 738 /** 739 * Translate a MIME standard character set name into the Java 740 * equivalent. 741 * 742 * @param charset The MIME standard name. 743 * 744 * @return The Java equivalent for this name. 745 */ 746 public static String javaCharset(String charset) { 747 // nothing in, nothing out. 748 if (charset == null) { 749 return null; 750 } 751 752 String mappedCharset = (String)mime2java.get(charset.toLowerCase()); 753 // if there is no mapping, then the original name is used. Many of the MIME character set 754 // names map directly back into Java. The reverse isn't necessarily true. 755 return mappedCharset == null ? charset : mappedCharset; 756 } 757 758 /** 759 * Map a Java character set name into the MIME equivalent. 760 * 761 * @param charset The java character set name. 762 * 763 * @return The MIME standard equivalent for this character set name. 764 */ 765 public static String mimeCharset(String charset) { 766 // nothing in, nothing out. 767 if (charset == null) { 768 return null; 769 } 770 771 String mappedCharset = (String)java2mime.get(charset.toLowerCase()); 772 // if there is no mapping, then the original name is used. Many of the MIME character set 773 // names map directly back into Java. The reverse isn't necessarily true. 774 return mappedCharset == null ? charset : mappedCharset; 775 } 776 777 778 /** 779 * Get the default character set to use, in Java name format. 780 * This either be the value set with the mail.mime.charset 781 * system property or obtained from the file.encoding system 782 * property. If neither of these is set, we fall back to 783 * 8859_1 (basically US-ASCII). 784 * 785 * @return The character string value of the default character set. 786 */ 787 public static String getDefaultJavaCharset() { 788 String charset = SessionUtil.getProperty("mail.mime.charset"); 789 if (charset != null) { 790 return javaCharset(charset); 791 } 792 return SessionUtil.getProperty("file.encoding", "8859_1"); 793 } 794 795 /** 796 * Get the default character set to use, in MIME name format. 797 * This either be the value set with the mail.mime.charset 798 * system property or obtained from the file.encoding system 799 * property. If neither of these is set, we fall back to 800 * 8859_1 (basically US-ASCII). 801 * 802 * @return The character string value of the default character set. 803 */ 804 static String getDefaultMIMECharset() { 805 // if the property is specified, this can be used directly. 806 String charset = SessionUtil.getProperty("mail.mime.charset"); 807 if (charset != null) { 808 return charset; 809 } 810 811 // get the Java-defined default and map back to a MIME name. 812 return mimeCharset(SessionUtil.getProperty("file.encoding", "8859_1")); 813 } 814 815 816 /** 817 * Load the default mapping tables used by the javaCharset() 818 * and mimeCharset() methods. By default, these tables are 819 * loaded from the /META-INF/javamail.charset.map file. If 820 * something goes wrong loading that file, we configure things 821 * with a default mapping table (which just happens to mimic 822 * what's in the default mapping file). 823 */ 824 static private void loadCharacterSetMappings() { 825 java2mime = new HashMap(); 826 mime2java = new HashMap(); 827 828 829 // normally, these come from a character map file contained in the jar file. 830 try { 831 InputStream map = javax.mail.internet.MimeUtility.class.getResourceAsStream("/META-INF/javamail.charset.map"); 832 833 if (map != null) { 834 // get a reader for this so we can load. 835 BufferedReader reader = new BufferedReader(new InputStreamReader(map)); 836 837 readMappings(reader, java2mime); 838 readMappings(reader, mime2java); 839 } 840 } catch (Exception e) { 841 } 842 843 // if any sort of error occurred reading the preferred file version, we could end up with empty 844 // mapping tables. This could cause all sorts of difficulty, so ensure they are populated with at 845 // least a reasonable set of defaults. 846 847 // these mappings echo what's in the default file. 848 if (java2mime.isEmpty()) { 849 java2mime.put("8859_1", "ISO-8859-1"); 850 java2mime.put("iso8859_1", "ISO-8859-1"); 851 java2mime.put("iso8859-1", "ISO-8859-1"); 852 853 java2mime.put("8859_2", "ISO-8859-2"); 854 java2mime.put("iso8859_2", "ISO-8859-2"); 855 java2mime.put("iso8859-2", "ISO-8859-2"); 856 857 java2mime.put("8859_3", "ISO-8859-3"); 858 java2mime.put("iso8859_3", "ISO-8859-3"); 859 java2mime.put("iso8859-3", "ISO-8859-3"); 860 861 java2mime.put("8859_4", "ISO-8859-4"); 862 java2mime.put("iso8859_4", "ISO-8859-4"); 863 java2mime.put("iso8859-4", "ISO-8859-4"); 864 865 java2mime.put("8859_5", "ISO-8859-5"); 866 java2mime.put("iso8859_5", "ISO-8859-5"); 867 java2mime.put("iso8859-5", "ISO-8859-5"); 868 869 java2mime.put ("8859_6", "ISO-8859-6"); 870 java2mime.put("iso8859_6", "ISO-8859-6"); 871 java2mime.put("iso8859-6", "ISO-8859-6"); 872 873 java2mime.put("8859_7", "ISO-8859-7"); 874 java2mime.put("iso8859_7", "ISO-8859-7"); 875 java2mime.put("iso8859-7", "ISO-8859-7"); 876 877 java2mime.put("8859_8", "ISO-8859-8"); 878 java2mime.put("iso8859_8", "ISO-8859-8"); 879 java2mime.put("iso8859-8", "ISO-8859-8"); 880 881 java2mime.put("8859_9", "ISO-8859-9"); 882 java2mime.put("iso8859_9", "ISO-8859-9"); 883 java2mime.put("iso8859-9", "ISO-8859-9"); 884 885 java2mime.put("sjis", "Shift_JIS"); 886 java2mime.put ("jis", "ISO-2022-JP"); 887 java2mime.put("iso2022jp", "ISO-2022-JP"); 888 java2mime.put("euc_jp", "euc-jp"); 889 java2mime.put("koi8_r", "koi8-r"); 890 java2mime.put("euc_cn", "euc-cn"); 891 java2mime.put("euc_tw", "euc-tw"); 892 java2mime.put("euc_kr", "euc-kr"); 893 } 894 895 if (mime2java.isEmpty ()) { 896 mime2java.put("iso-2022-cn", "ISO2022CN"); 897 mime2java.put("iso-2022-kr", "ISO2022KR"); 898 mime2java.put("utf-8", "UTF8"); 899 mime2java.put("utf8", "UTF8"); 900 mime2java.put("ja_jp.iso2022-7", "ISO2022JP"); 901 mime2java.put("ja_jp.eucjp", "EUCJIS"); 902 mime2java.put ("euc-kr", "KSC5601"); 903 mime2java.put("euckr", "KSC5601"); 904 mime2java.put("us-ascii", "ISO-8859-1"); 905 mime2java.put("x-us-ascii", "ISO-8859-1"); 906 } 907 } 908 909 910 /** 911 * Read a section of a character map table and populate the 912 * target mapping table with the information. The table end 913 * is marked by a line starting with "--" and also ending with 914 * "--". Blank lines and comment lines (beginning with '#') are 915 * ignored. 916 * 917 * @param reader The source of the file information. 918 * @param table The mapping table used to store the information. 919 */ 920 static private void readMappings(BufferedReader reader, Map table) throws IOException { 921 // process lines to the EOF or the end of table marker. 922 while (true) { 923 String line = reader.readLine(); 924 // no line returned is an EOF 925 if (line == null) { 926 return; 927 } 928 929 // trim so we're not messed up by trailing blanks 930 line = line.trim(); 931 932 if (line.length() == 0 || line.startsWith("#")) { 933 continue; 934 } 935 936 // stop processing if this is the end-of-table marker. 937 if (line.startsWith("--") && line.endsWith("--")) { 938 return; 939 } 940 941 // we allow either blanks or tabs as token delimiters. 942 StringTokenizer tokenizer = new StringTokenizer(line, " \t"); 943 944 try { 945 String from = tokenizer.nextToken().toLowerCase(); 946 String to = tokenizer.nextToken(); 947 948 table.put(from, to); 949 } catch (NoSuchElementException e) { 950 // just ignore the line if invalid. 951 } 952 } 953 } 954 955 956 } 957 958 959 /** 960 * Utility class for examining content information written out 961 * by a DataHandler object. This stream gathers statistics on 962 * the stream so it can make transfer encoding determinations. 963 */ 964 class ContentCheckingOutputStream extends OutputStream { 965 private int asciiChars = 0; 966 private int nonAsciiChars = 0; 967 private boolean containsLongLines = false; 968 private boolean containsMalformedEOL = false; 969 private int previousChar = 0; 970 private int span = 0; 971 972 ContentCheckingOutputStream() { 973 } 974 975 public void write(byte[] data) throws IOException { 976 write(data, 0, data.length); 977 } 978 979 public void write(byte[] data, int offset, int length) throws IOException { 980 for (int i = 0; i < length; i++) { 981 write(data[offset + i]); 982 } 983 } 984 985 public void write(int ch) { 986 // we found a linebreak. Reset the line length counters on either one. We don't 987 // really need to validate here. 988 if (ch == '\n' || ch == '\r') { 989 // we found a newline, this is only valid if the previous char was the '\r' 990 if (ch == '\n') { 991 // malformed linebreak? force this to base64 encoding. 992 if (previousChar != '\r') { 993 containsMalformedEOL = true; 994 } 995 } 996 // hit a line end, reset our line length counter 997 span = 0; 998 } 999 else { 1000 span++; 1001 // the text has long lines, we can't transfer this as unencoded text. 1002 if (span > 998) { 1003 containsLongLines = true; 1004 } 1005 1006 // non-ascii character, we have to transfer this in binary. 1007 if (!ASCIIUtil.isAscii(ch)) { 1008 nonAsciiChars++; 1009 } 1010 else { 1011 asciiChars++; 1012 } 1013 } 1014 previousChar = ch; 1015 } 1016 1017 1018 public String getBinaryTransferEncoding() { 1019 if (nonAsciiChars != 0 || containsLongLines || containsMalformedEOL) { 1020 return "base64"; 1021 } 1022 else { 1023 return "7bit"; 1024 } 1025 } 1026 1027 public String getTextTransferEncoding() { 1028 // looking good so far, only valid chars here. 1029 if (nonAsciiChars == 0) { 1030 // does this contain long text lines? We need to use a Q-P encoding which will 1031 // be only slightly longer, but handles folding the longer lines. 1032 if (containsLongLines) { 1033 return "quoted-printable"; 1034 } 1035 else { 1036 // ideal! Easiest one to handle. 1037 return "7bit"; 1038 } 1039 } 1040 else { 1041 // mostly characters requiring encoding? Base64 is our best bet. 1042 if (nonAsciiChars > asciiChars) { 1043 return "base64"; 1044 } 1045 else { 1046 // Q-P encoding will use fewer bytes than the full Base64. 1047 return "quoted-printable"; 1048 } 1049 } 1050 } 1051 }