001 /**
002 *
003 * Copyright 2003-2004 The Apache Software Foundation
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package javax.mail.internet;
019
020 import java.io.BufferedInputStream;
021 import java.io.BufferedReader;
022 import java.io.ByteArrayInputStream;
023 import java.io.ByteArrayOutputStream;
024 import java.io.IOException;
025 import java.io.InputStream;
026 import java.io.InputStreamReader;
027 import java.io.OutputStream;
028 import java.io.UnsupportedEncodingException;
029 import java.util.HashMap;
030 import java.util.Map;
031 import java.util.NoSuchElementException;
032 import java.util.StringTokenizer;
033
034 import javax.activation.DataHandler;
035 import javax.activation.DataSource;
036 import javax.mail.MessagingException;
037
038 import org.apache.geronimo.mail.util.ASCIIUtil;
039 import org.apache.geronimo.mail.util.Base64;
040 import org.apache.geronimo.mail.util.Base64DecoderStream;
041 import org.apache.geronimo.mail.util.Base64Encoder;
042 import org.apache.geronimo.mail.util.Base64EncoderStream;
043 import org.apache.geronimo.mail.util.QuotedPrintableDecoderStream;
044 import org.apache.geronimo.mail.util.QuotedPrintableEncoderStream;
045 import org.apache.geronimo.mail.util.QuotedPrintableEncoder;
046 import org.apache.geronimo.mail.util.QuotedPrintable;
047 import org.apache.geronimo.mail.util.SessionUtil;
048 import org.apache.geronimo.mail.util.UUDecoderStream;
049 import org.apache.geronimo.mail.util.UUEncoderStream;
050
051 // encodings include "base64", "quoted-printable", "7bit", "8bit" and "binary".
052 // In addition, "uuencode" is also supported. The
053
054 /**
055 * @version $Rev: 412426 $ $Date: 2006-06-07 08:21:46 -0700 (Wed, 07 Jun 2006) $
056 */
057 public class MimeUtility {
058
059 private static final String MIME_FOLDENCODEDWORDS = "mail.mime.foldencodedwords";
060 private static final String MIME_DECODE_TEXT_STRICT = "mail.mime.decodetext.strict";
061
062 private MimeUtility() {
063 }
064
065 public static final int ALL = -1;
066
067 private static String defaultJavaCharset;
068 private static String escapedChars = "\"\\\r\n";
069 private static String linearWhiteSpace = " \t\r\n";
070
071 private static String QP_WORD_SPECIALS = "=_?\"#$%&'(),.:;<>@[\\]^`{|}~";
072 private static String QP_TEXT_SPECIALS = "=_?";
073
074 // the javamail spec includes the ability to map java encoding names to MIME-specified names. Normally,
075 // these values are loaded from a character mapping file.
076 private static Map java2mime;
077 private static Map mime2java;
078
079 static {
080 // we need to load the mapping tables used by javaCharset() and mimeCharset().
081 loadCharacterSetMappings();
082 }
083
084 public static InputStream decode(InputStream in, String encoding) throws MessagingException {
085 encoding = encoding.toLowerCase();
086
087 // some encodies are just pass-throughs, with no real decoding.
088 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
089 return in;
090 }
091 else if (encoding.equals("base64")) {
092 return new Base64DecoderStream(in);
093 }
094 // UUEncode is known by a couple historical extension names too.
095 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
096 return new UUDecoderStream(in);
097 }
098 else if (encoding.equals("quoted-printable")) {
099 return new QuotedPrintableDecoderStream(in);
100 }
101 else {
102 throw new MessagingException("Unknown encoding " + encoding);
103 }
104 }
105
106 /**
107 * Decode a string of text obtained from a mail header into
108 * it's proper form. The text generally will consist of a
109 * string of tokens, some of which may be encoded using
110 * base64 encoding.
111 *
112 * @param text The text to decode.
113 *
114 * @return The decoded test string.
115 * @exception UnsupportedEncodingException
116 */
117 public static String decodeText(String text) throws UnsupportedEncodingException {
118 // if the text contains any encoded tokens, those tokens will be marked with "=?". If the
119 // source string doesn't contain that sequent, no decoding is required.
120 if (text.indexOf("=?") < 0) {
121 return text;
122 }
123
124 // we have two sets of rules we can apply.
125 if (!SessionUtil.getBooleanProperty(MIME_DECODE_TEXT_STRICT, true)) {
126 return decodeTextNonStrict(text);
127 }
128
129 int offset = 0;
130 int endOffset = text.length();
131
132 int startWhiteSpace = -1;
133 int endWhiteSpace = -1;
134
135 StringBuffer decodedText = new StringBuffer(text.length());
136
137 boolean previousTokenEncoded = false;
138
139 while (offset < endOffset) {
140 char ch = text.charAt(offset);
141
142 // is this a whitespace character?
143 if (linearWhiteSpace.indexOf(ch) != -1) {
144 startWhiteSpace = offset;
145 while (offset < endOffset) {
146 // step over the white space characters.
147 ch = text.charAt(offset);
148 if (linearWhiteSpace.indexOf(ch) != -1) {
149 offset++;
150 }
151 else {
152 // record the location of the first non lwsp and drop down to process the
153 // token characters.
154 endWhiteSpace = offset;
155 break;
156 }
157 }
158 }
159 else {
160 // we have a word token. We need to scan over the word and then try to parse it.
161 int wordStart = offset;
162
163 while (offset < endOffset) {
164 // step over the white space characters.
165 ch = text.charAt(offset);
166 if (linearWhiteSpace.indexOf(ch) == -1) {
167 offset++;
168 }
169 else {
170 break;
171 }
172
173 //NB: Trailing whitespace on these header strings will just be discarded.
174 }
175 // pull out the word token.
176 String word = text.substring(wordStart, offset);
177 // is the token encoded? decode the word
178 if (word.startsWith("=?")) {
179 try {
180 // if this gives a parsing failure, treat it like a non-encoded word.
181 String decodedWord = decodeWord(word);
182
183 // are any whitespace characters significant? Append 'em if we've got 'em.
184 if (!previousTokenEncoded) {
185 if (startWhiteSpace != -1) {
186 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
187 startWhiteSpace = -1;
188 }
189 }
190 // this is definitely a decoded token.
191 previousTokenEncoded = true;
192 // and add this to the text.
193 decodedText.append(decodedWord);
194 // we continue parsing from here...we allow parsing errors to fall through
195 // and get handled as normal text.
196 continue;
197
198 } catch (ParseException e) {
199 }
200 }
201 // this is a normal token, so it doesn't matter what the previous token was. Add the white space
202 // if we have it.
203 if (startWhiteSpace != -1) {
204 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
205 startWhiteSpace = -1;
206 }
207 // this is not a decoded token.
208 previousTokenEncoded = false;
209 decodedText.append(word);
210 }
211 }
212
213 return decodedText.toString();
214 }
215
216
217 /**
218 * Decode a string of text obtained from a mail header into
219 * it's proper form. The text generally will consist of a
220 * string of tokens, some of which may be encoded using
221 * base64 encoding. This is for non-strict decoded for mailers that
222 * violate the RFC 2047 restriction that decoded tokens must be delimited
223 * by linear white space. This will scan tokens looking for inner tokens
224 * enclosed in "=?" -- "?=" pairs.
225 *
226 * @param text The text to decode.
227 *
228 * @return The decoded test string.
229 * @exception UnsupportedEncodingException
230 */
231 private static String decodeTextNonStrict(String text) throws UnsupportedEncodingException {
232 int offset = 0;
233 int endOffset = text.length();
234
235 int startWhiteSpace = -1;
236 int endWhiteSpace = -1;
237
238 StringBuffer decodedText = new StringBuffer(text.length());
239
240 boolean previousTokenEncoded = false;
241
242 while (offset < endOffset) {
243 char ch = text.charAt(offset);
244
245 // is this a whitespace character?
246 if (linearWhiteSpace.indexOf(ch) != -1) {
247 startWhiteSpace = offset;
248 while (offset < endOffset) {
249 // step over the white space characters.
250 ch = text.charAt(offset);
251 if (linearWhiteSpace.indexOf(ch) != -1) {
252 offset++;
253 }
254 else {
255 // record the location of the first non lwsp and drop down to process the
256 // token characters.
257 endWhiteSpace = offset;
258 break;
259 }
260 }
261 }
262 else {
263 // we're at the start of a word token. We potentially need to break this up into subtokens
264 int wordStart = offset;
265
266 while (offset < endOffset) {
267 // step over the white space characters.
268 ch = text.charAt(offset);
269 if (linearWhiteSpace.indexOf(ch) == -1) {
270 offset++;
271 }
272 else {
273 break;
274 }
275
276 //NB: Trailing whitespace on these header strings will just be discarded.
277 }
278 // pull out the word token.
279 String word = text.substring(wordStart, offset);
280
281 int decodeStart = 0;
282
283 // now scan and process each of the bits within here.
284 while (decodeStart < word.length()) {
285 int tokenStart = word.indexOf("=?", decodeStart);
286 if (tokenStart == -1) {
287 // this is a normal token, so it doesn't matter what the previous token was. Add the white space
288 // if we have it.
289 if (startWhiteSpace != -1) {
290 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
291 startWhiteSpace = -1;
292 }
293 // this is not a decoded token.
294 previousTokenEncoded = false;
295 decodedText.append(word.substring(decodeStart));
296 // we're finished.
297 break;
298 }
299 // we have something to process
300 else {
301 // we might have a normal token preceeding this.
302 if (tokenStart != decodeStart) {
303 // this is a normal token, so it doesn't matter what the previous token was. Add the white space
304 // if we have it.
305 if (startWhiteSpace != -1) {
306 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
307 startWhiteSpace = -1;
308 }
309 // this is not a decoded token.
310 previousTokenEncoded = false;
311 decodedText.append(word.substring(decodeStart, tokenStart));
312 }
313
314 // now find the end marker.
315 int tokenEnd = word.indexOf("?=", tokenStart);
316 // sigh, an invalid token. Treat this as plain text.
317 if (tokenEnd == -1) {
318 // this is a normal token, so it doesn't matter what the previous token was. Add the white space
319 // if we have it.
320 if (startWhiteSpace != -1) {
321 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
322 startWhiteSpace = -1;
323 }
324 // this is not a decoded token.
325 previousTokenEncoded = false;
326 decodedText.append(word.substring(tokenStart));
327 // we're finished.
328 break;
329 }
330 else {
331 // update our ticker
332 decodeStart = tokenEnd + 2;
333
334 String token = word.substring(tokenStart, tokenEnd);
335 try {
336 // if this gives a parsing failure, treat it like a non-encoded word.
337 String decodedWord = decodeWord(token);
338
339 // are any whitespace characters significant? Append 'em if we've got 'em.
340 if (!previousTokenEncoded) {
341 if (startWhiteSpace != -1) {
342 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
343 startWhiteSpace = -1;
344 }
345 }
346 // this is definitely a decoded token.
347 previousTokenEncoded = true;
348 // and add this to the text.
349 decodedText.append(decodedWord);
350 // we continue parsing from here...we allow parsing errors to fall through
351 // and get handled as normal text.
352 continue;
353
354 } catch (ParseException e) {
355 }
356 // this is a normal token, so it doesn't matter what the previous token was. Add the white space
357 // if we have it.
358 if (startWhiteSpace != -1) {
359 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
360 startWhiteSpace = -1;
361 }
362 // this is not a decoded token.
363 previousTokenEncoded = false;
364 decodedText.append(token);
365 }
366 }
367 }
368 }
369 }
370
371 return decodedText.toString();
372 }
373
374 /**
375 * Parse a string using the RFC 2047 rules for an "encoded-word"
376 * type. This encoding has the syntax:
377 *
378 * encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
379 *
380 * @param word The possibly encoded word value.
381 *
382 * @return The decoded word.
383 * @exception ParseException
384 * @exception UnsupportedEncodingException
385 */
386 public static String decodeWord(String word) throws ParseException, UnsupportedEncodingException {
387 // encoded words start with the characters "=?". If this not an encoded word, we throw a
388 // ParseException for the caller.
389
390 if (!word.startsWith("=?")) {
391 throw new ParseException("Invalid RFC 2047 encoded-word: " + word);
392 }
393
394 int charsetPos = word.indexOf('?', 2);
395 if (charsetPos == -1) {
396 throw new ParseException("Missing charset in RFC 2047 encoded-word: " + word);
397 }
398
399 // pull out the character set information (this is the MIME name at this point).
400 String charset = word.substring(2, charsetPos).toLowerCase();
401
402 // now pull out the encoding token the same way.
403 int encodingPos = word.indexOf('?', charsetPos + 1);
404 if (encodingPos == -1) {
405 throw new ParseException("Missing encoding in RFC 2047 encoded-word: " + word);
406 }
407
408 String encoding = word.substring(charsetPos + 1, encodingPos);
409
410 // and finally the encoded text.
411 int encodedTextPos = word.indexOf("?=", encodingPos + 1);
412 if (encodedTextPos == -1) {
413 throw new ParseException("Missing encoded text in RFC 2047 encoded-word: " + word);
414 }
415
416 String encodedText = word.substring(encodingPos + 1, encodedTextPos);
417
418 // seems a bit silly to encode a null string, but easy to deal with.
419 if (encodedText.length() == 0) {
420 return "";
421 }
422
423 try {
424 // the decoder writes directly to an output stream.
425 ByteArrayOutputStream out = new ByteArrayOutputStream(encodedText.length());
426
427 byte[] encodedData = encodedText.getBytes("US-ASCII");
428
429 // Base64 encoded?
430 if (encoding.equals("B")) {
431 Base64.decode(encodedData, out);
432 }
433 // maybe quoted printable.
434 else if (encoding.equals("Q")) {
435 QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
436 dataEncoder.decodeWord(encodedData, out);
437 }
438 else {
439 throw new UnsupportedEncodingException("Unknown RFC 2047 encoding: " + encoding);
440 }
441 // get the decoded byte data and convert into a string.
442 byte[] decodedData = out.toByteArray();
443 return new String(decodedData, javaCharset(charset));
444 } catch (IOException e) {
445 throw new UnsupportedEncodingException("Invalid RFC 2047 encoding");
446 }
447
448 }
449
450 /**
451 * Wrap an encoder around a given output stream.
452 *
453 * @param out The output stream to wrap.
454 * @param encoding The name of the encoding.
455 *
456 * @return A instance of FilterOutputStream that manages on the fly
457 * encoding for the requested encoding type.
458 * @exception MessagingException
459 */
460 public static OutputStream encode(OutputStream out, String encoding) throws MessagingException {
461 // no encoding specified, so assume it goes out unchanged.
462 if (encoding == null) {
463 return out;
464 }
465
466 encoding = encoding.toLowerCase();
467
468 // some encodies are just pass-throughs, with no real decoding.
469 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
470 return out;
471 }
472 else if (encoding.equals("base64")) {
473 return new Base64EncoderStream(out);
474 }
475 // UUEncode is known by a couple historical extension names too.
476 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
477 return new UUEncoderStream(out);
478 }
479 else if (encoding.equals("quoted-printable")) {
480 return new QuotedPrintableEncoderStream(out);
481 }
482 else {
483 throw new MessagingException("Unknown encoding " + encoding);
484 }
485 }
486
487 /**
488 * Wrap an encoder around a given output stream.
489 *
490 * @param out The output stream to wrap.
491 * @param encoding The name of the encoding.
492 * @param filename The filename of the data being sent (only used for UUEncode).
493 *
494 * @return A instance of FilterOutputStream that manages on the fly
495 * encoding for the requested encoding type.
496 * @exception MessagingException
497 */
498 public static OutputStream encode(OutputStream out, String encoding, String filename) throws MessagingException {
499 encoding = encoding.toLowerCase();
500
501 // some encodies are just pass-throughs, with no real decoding.
502 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
503 return out;
504 }
505 else if (encoding.equals("base64")) {
506 return new Base64EncoderStream(out);
507 }
508 // UUEncode is known by a couple historical extension names too.
509 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
510 return new UUEncoderStream(out, filename);
511 }
512 else if (encoding.equals("quoted-printable")) {
513 return new QuotedPrintableEncoderStream(out);
514 }
515 else {
516 throw new MessagingException("Unknown encoding " + encoding);
517 }
518 }
519
520
521 public static String encodeText(String word) throws UnsupportedEncodingException {
522 return encodeText(word, null, null);
523 }
524
525 public static String encodeText(String word, String charset, String encoding) throws UnsupportedEncodingException {
526 return encodeWord(word, charset, encoding, false);
527 }
528
529 public static String encodeWord(String word) throws UnsupportedEncodingException {
530 return encodeWord(word, null, null);
531 }
532
533 public static String encodeWord(String word, String charset, String encoding) throws UnsupportedEncodingException {
534 return encodeWord(word, charset, encoding, true);
535 }
536
537
538 private static String encodeWord(String word, String charset, String encoding, boolean encodingWord) throws UnsupportedEncodingException {
539
540 // figure out what we need to encode this.
541 String encoder = ASCIIUtil.getTextTransferEncoding(word);
542 // all ascii? We can return this directly,
543 if (encoder.equals("7bit")) {
544 return word;
545 }
546
547 // if not given a charset, use the default.
548 if (charset == null) {
549 charset = getDefaultMIMECharset();
550 }
551
552 // sort out the encoder. If not explicitly given, use the best guess we've already established.
553 if (encoding != null) {
554 if (encoding.equalsIgnoreCase("B")) {
555 encoder = "base64";
556 }
557 else if (encoding.equalsIgnoreCase("Q")) {
558 encoder = "quoted-printable";
559 }
560 else {
561 throw new UnsupportedEncodingException("Unknown transfer encoding: " + encoding);
562 }
563 }
564
565 try {
566 // get the string bytes in the correct source charset
567 InputStream in = new ByteArrayInputStream(word.getBytes( javaCharset(charset)));
568 ByteArrayOutputStream out = new ByteArrayOutputStream();
569
570 if (encoder.equals("base64")) {
571 Base64Encoder dataEncoder = new Base64Encoder();
572 dataEncoder.encodeWord(in, charset, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false));
573 }
574 else {
575 QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
576 dataEncoder.encodeWord(in, charset, encodingWord ? QP_WORD_SPECIALS : QP_TEXT_SPECIALS, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false));
577 }
578
579 byte[] bytes = out.toByteArray();
580 return new String(bytes);
581 } catch (IOException e) {
582 throw new UnsupportedEncodingException("Invalid encoding");
583 }
584 }
585
586
587 /**
588 * Examine the content of a data source and decide what type
589 * of transfer encoding should be used. For text streams,
590 * we'll decided between 7bit, quoted-printable, and base64.
591 * For binary content types, we'll use either 7bit or base64.
592 *
593 * @param handler The DataHandler associated with the content.
594 *
595 * @return The string name of an encoding used to transfer the content.
596 */
597 public static String getEncoding(DataHandler handler) {
598
599
600 // if this handler has an associated data source, we can read directly from the
601 // data source to make this judgment. This is generally MUCH faster than asking the
602 // DataHandler to write out the data for us.
603 DataSource ds = handler.getDataSource();
604 if (ds != null) {
605 return getEncoding(ds);
606 }
607
608 try {
609 // get a parser that allows us to make comparisons.
610 ContentType content = new ContentType(ds.getContentType());
611
612 // The only access to the content bytes at this point is by asking the handler to write
613 // the information out to a stream. We're going to pipe this through a special stream
614 // that examines the bytes as they go by.
615 ContentCheckingOutputStream checker = new ContentCheckingOutputStream();
616
617 handler.writeTo(checker);
618
619 // figure this out based on whether we believe this to be a text type or not.
620 if (content.match("text/*")) {
621 return checker.getTextTransferEncoding();
622 }
623 else {
624 return checker.getBinaryTransferEncoding();
625 }
626
627 } catch (Exception e) {
628 // any unexpected I/O exceptions we'll force to a "safe" fallback position.
629 return "base64";
630 }
631 }
632
633
634 /**
635 * Determine the what transfer encoding should be used for
636 * data retrieved from a DataSource.
637 *
638 * @param source The DataSource for the transmitted data.
639 *
640 * @return The string name of the encoding form that should be used for
641 * the data.
642 */
643 public static String getEncoding(DataSource source) {
644 InputStream in = null;
645
646 try {
647 // get a parser that allows us to make comparisons.
648 ContentType content = new ContentType(source.getContentType());
649
650 // we're probably going to have to scan the data.
651 in = source.getInputStream();
652
653 if (!content.match("text/*")) {
654 // Not purporting to be a text type? Examine the content to see we might be able to
655 // at least pretend it is an ascii type.
656 return ASCIIUtil.getBinaryTransferEncoding(in);
657 }
658 else {
659 return ASCIIUtil.getTextTransferEncoding(in);
660 }
661 } catch (Exception e) {
662 // this was a problem...not sure what makes sense here, so we'll assume it's binary
663 // and we need to transfer this using Base64 encoding.
664 return "base64";
665 } finally {
666 // make sure we close the stream
667 try {
668 if (in != null) {
669 in.close();
670 }
671 } catch (IOException e) {
672 }
673 }
674 }
675
676
677 /**
678 * Quote a "word" value. If the word contains any character from
679 * the specified "specials" list, this value is returned as a
680 * quoted strong. Otherwise, it is returned unchanged (an "atom").
681 *
682 * @param word The word requiring quoting.
683 * @param specials The set of special characters that can't appear in an unquoted
684 * string.
685 *
686 * @return The quoted value. This will be unchanged if the word doesn't contain
687 * any of the designated special characters.
688 */
689 public static String quote(String word, String specials) {
690 int wordLength = word.length();
691 boolean requiresQuoting = false;
692 // scan the string looking for problem characters
693 for (int i =0; i < wordLength; i++) {
694 char ch = word.charAt(i);
695 // special escaped characters require escaping, which also implies quoting.
696 if (escapedChars.indexOf(ch) >= 0) {
697 return quoteAndEscapeString(word);
698 }
699 // now check for control characters or the designated special characters.
700 if (ch < 32 || ch >= 127 || specials.indexOf(ch) >= 0) {
701 // we know this requires quoting, but we still need to scan the entire string to
702 // see if contains chars that require escaping. Just go ahead and treat it as if it does.
703 return quoteAndEscapeString(word);
704 }
705 }
706 return word;
707 }
708
709 /**
710 * Take a string and return it as a formatted quoted string, with
711 * all characters requiring escaping handled properly.
712 *
713 * @param word The string to quote.
714 *
715 * @return The quoted string.
716 */
717 private static String quoteAndEscapeString(String word) {
718 int wordLength = word.length();
719 // allocate at least enough for the string and two quotes plus a reasonable number of escaped chars.
720 StringBuffer buffer = new StringBuffer(wordLength + 10);
721 // add the leading quote.
722 buffer.append('"');
723
724 for (int i = 0; i < wordLength; i++) {
725 char ch = word.charAt(i);
726 // is this an escaped char?
727 if (escapedChars.indexOf(ch) >= 0) {
728 // add the escape marker before appending.
729 buffer.append('\\');
730 }
731 buffer.append(ch);
732 }
733 // now the closing quote
734 buffer.append('"');
735 return buffer.toString();
736 }
737
738 /**
739 * Translate a MIME standard character set name into the Java
740 * equivalent.
741 *
742 * @param charset The MIME standard name.
743 *
744 * @return The Java equivalent for this name.
745 */
746 public static String javaCharset(String charset) {
747 // nothing in, nothing out.
748 if (charset == null) {
749 return null;
750 }
751
752 String mappedCharset = (String)mime2java.get(charset.toLowerCase());
753 // if there is no mapping, then the original name is used. Many of the MIME character set
754 // names map directly back into Java. The reverse isn't necessarily true.
755 return mappedCharset == null ? charset : mappedCharset;
756 }
757
758 /**
759 * Map a Java character set name into the MIME equivalent.
760 *
761 * @param charset The java character set name.
762 *
763 * @return The MIME standard equivalent for this character set name.
764 */
765 public static String mimeCharset(String charset) {
766 // nothing in, nothing out.
767 if (charset == null) {
768 return null;
769 }
770
771 String mappedCharset = (String)java2mime.get(charset.toLowerCase());
772 // if there is no mapping, then the original name is used. Many of the MIME character set
773 // names map directly back into Java. The reverse isn't necessarily true.
774 return mappedCharset == null ? charset : mappedCharset;
775 }
776
777
778 /**
779 * Get the default character set to use, in Java name format.
780 * This either be the value set with the mail.mime.charset
781 * system property or obtained from the file.encoding system
782 * property. If neither of these is set, we fall back to
783 * 8859_1 (basically US-ASCII).
784 *
785 * @return The character string value of the default character set.
786 */
787 public static String getDefaultJavaCharset() {
788 String charset = SessionUtil.getProperty("mail.mime.charset");
789 if (charset != null) {
790 return javaCharset(charset);
791 }
792 return SessionUtil.getProperty("file.encoding", "8859_1");
793 }
794
795 /**
796 * Get the default character set to use, in MIME name format.
797 * This either be the value set with the mail.mime.charset
798 * system property or obtained from the file.encoding system
799 * property. If neither of these is set, we fall back to
800 * 8859_1 (basically US-ASCII).
801 *
802 * @return The character string value of the default character set.
803 */
804 static String getDefaultMIMECharset() {
805 // if the property is specified, this can be used directly.
806 String charset = SessionUtil.getProperty("mail.mime.charset");
807 if (charset != null) {
808 return charset;
809 }
810
811 // get the Java-defined default and map back to a MIME name.
812 return mimeCharset(SessionUtil.getProperty("file.encoding", "8859_1"));
813 }
814
815
816 /**
817 * Load the default mapping tables used by the javaCharset()
818 * and mimeCharset() methods. By default, these tables are
819 * loaded from the /META-INF/javamail.charset.map file. If
820 * something goes wrong loading that file, we configure things
821 * with a default mapping table (which just happens to mimic
822 * what's in the default mapping file).
823 */
824 static private void loadCharacterSetMappings() {
825 java2mime = new HashMap();
826 mime2java = new HashMap();
827
828
829 // normally, these come from a character map file contained in the jar file.
830 try {
831 InputStream map = javax.mail.internet.MimeUtility.class.getResourceAsStream("/META-INF/javamail.charset.map");
832
833 if (map != null) {
834 // get a reader for this so we can load.
835 BufferedReader reader = new BufferedReader(new InputStreamReader(map));
836
837 readMappings(reader, java2mime);
838 readMappings(reader, mime2java);
839 }
840 } catch (Exception e) {
841 }
842
843 // if any sort of error occurred reading the preferred file version, we could end up with empty
844 // mapping tables. This could cause all sorts of difficulty, so ensure they are populated with at
845 // least a reasonable set of defaults.
846
847 // these mappings echo what's in the default file.
848 if (java2mime.isEmpty()) {
849 java2mime.put("8859_1", "ISO-8859-1");
850 java2mime.put("iso8859_1", "ISO-8859-1");
851 java2mime.put("iso8859-1", "ISO-8859-1");
852
853 java2mime.put("8859_2", "ISO-8859-2");
854 java2mime.put("iso8859_2", "ISO-8859-2");
855 java2mime.put("iso8859-2", "ISO-8859-2");
856
857 java2mime.put("8859_3", "ISO-8859-3");
858 java2mime.put("iso8859_3", "ISO-8859-3");
859 java2mime.put("iso8859-3", "ISO-8859-3");
860
861 java2mime.put("8859_4", "ISO-8859-4");
862 java2mime.put("iso8859_4", "ISO-8859-4");
863 java2mime.put("iso8859-4", "ISO-8859-4");
864
865 java2mime.put("8859_5", "ISO-8859-5");
866 java2mime.put("iso8859_5", "ISO-8859-5");
867 java2mime.put("iso8859-5", "ISO-8859-5");
868
869 java2mime.put ("8859_6", "ISO-8859-6");
870 java2mime.put("iso8859_6", "ISO-8859-6");
871 java2mime.put("iso8859-6", "ISO-8859-6");
872
873 java2mime.put("8859_7", "ISO-8859-7");
874 java2mime.put("iso8859_7", "ISO-8859-7");
875 java2mime.put("iso8859-7", "ISO-8859-7");
876
877 java2mime.put("8859_8", "ISO-8859-8");
878 java2mime.put("iso8859_8", "ISO-8859-8");
879 java2mime.put("iso8859-8", "ISO-8859-8");
880
881 java2mime.put("8859_9", "ISO-8859-9");
882 java2mime.put("iso8859_9", "ISO-8859-9");
883 java2mime.put("iso8859-9", "ISO-8859-9");
884
885 java2mime.put("sjis", "Shift_JIS");
886 java2mime.put ("jis", "ISO-2022-JP");
887 java2mime.put("iso2022jp", "ISO-2022-JP");
888 java2mime.put("euc_jp", "euc-jp");
889 java2mime.put("koi8_r", "koi8-r");
890 java2mime.put("euc_cn", "euc-cn");
891 java2mime.put("euc_tw", "euc-tw");
892 java2mime.put("euc_kr", "euc-kr");
893 }
894
895 if (mime2java.isEmpty ()) {
896 mime2java.put("iso-2022-cn", "ISO2022CN");
897 mime2java.put("iso-2022-kr", "ISO2022KR");
898 mime2java.put("utf-8", "UTF8");
899 mime2java.put("utf8", "UTF8");
900 mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
901 mime2java.put("ja_jp.eucjp", "EUCJIS");
902 mime2java.put ("euc-kr", "KSC5601");
903 mime2java.put("euckr", "KSC5601");
904 mime2java.put("us-ascii", "ISO-8859-1");
905 mime2java.put("x-us-ascii", "ISO-8859-1");
906 }
907 }
908
909
910 /**
911 * Read a section of a character map table and populate the
912 * target mapping table with the information. The table end
913 * is marked by a line starting with "--" and also ending with
914 * "--". Blank lines and comment lines (beginning with '#') are
915 * ignored.
916 *
917 * @param reader The source of the file information.
918 * @param table The mapping table used to store the information.
919 */
920 static private void readMappings(BufferedReader reader, Map table) throws IOException {
921 // process lines to the EOF or the end of table marker.
922 while (true) {
923 String line = reader.readLine();
924 // no line returned is an EOF
925 if (line == null) {
926 return;
927 }
928
929 // trim so we're not messed up by trailing blanks
930 line = line.trim();
931
932 if (line.length() == 0 || line.startsWith("#")) {
933 continue;
934 }
935
936 // stop processing if this is the end-of-table marker.
937 if (line.startsWith("--") && line.endsWith("--")) {
938 return;
939 }
940
941 // we allow either blanks or tabs as token delimiters.
942 StringTokenizer tokenizer = new StringTokenizer(line, " \t");
943
944 try {
945 String from = tokenizer.nextToken().toLowerCase();
946 String to = tokenizer.nextToken();
947
948 table.put(from, to);
949 } catch (NoSuchElementException e) {
950 // just ignore the line if invalid.
951 }
952 }
953 }
954
955
956 }
957
958
959 /**
960 * Utility class for examining content information written out
961 * by a DataHandler object. This stream gathers statistics on
962 * the stream so it can make transfer encoding determinations.
963 */
964 class ContentCheckingOutputStream extends OutputStream {
965 private int asciiChars = 0;
966 private int nonAsciiChars = 0;
967 private boolean containsLongLines = false;
968 private boolean containsMalformedEOL = false;
969 private int previousChar = 0;
970 private int span = 0;
971
972 ContentCheckingOutputStream() {
973 }
974
975 public void write(byte[] data) throws IOException {
976 write(data, 0, data.length);
977 }
978
979 public void write(byte[] data, int offset, int length) throws IOException {
980 for (int i = 0; i < length; i++) {
981 write(data[offset + i]);
982 }
983 }
984
985 public void write(int ch) {
986 // we found a linebreak. Reset the line length counters on either one. We don't
987 // really need to validate here.
988 if (ch == '\n' || ch == '\r') {
989 // we found a newline, this is only valid if the previous char was the '\r'
990 if (ch == '\n') {
991 // malformed linebreak? force this to base64 encoding.
992 if (previousChar != '\r') {
993 containsMalformedEOL = true;
994 }
995 }
996 // hit a line end, reset our line length counter
997 span = 0;
998 }
999 else {
1000 span++;
1001 // the text has long lines, we can't transfer this as unencoded text.
1002 if (span > 998) {
1003 containsLongLines = true;
1004 }
1005
1006 // non-ascii character, we have to transfer this in binary.
1007 if (!ASCIIUtil.isAscii(ch)) {
1008 nonAsciiChars++;
1009 }
1010 else {
1011 asciiChars++;
1012 }
1013 }
1014 previousChar = ch;
1015 }
1016
1017
1018 public String getBinaryTransferEncoding() {
1019 if (nonAsciiChars != 0 || containsLongLines || containsMalformedEOL) {
1020 return "base64";
1021 }
1022 else {
1023 return "7bit";
1024 }
1025 }
1026
1027 public String getTextTransferEncoding() {
1028 // looking good so far, only valid chars here.
1029 if (nonAsciiChars == 0) {
1030 // does this contain long text lines? We need to use a Q-P encoding which will
1031 // be only slightly longer, but handles folding the longer lines.
1032 if (containsLongLines) {
1033 return "quoted-printable";
1034 }
1035 else {
1036 // ideal! Easiest one to handle.
1037 return "7bit";
1038 }
1039 }
1040 else {
1041 // mostly characters requiring encoding? Base64 is our best bet.
1042 if (nonAsciiChars > asciiChars) {
1043 return "base64";
1044 }
1045 else {
1046 // Q-P encoding will use fewer bytes than the full Base64.
1047 return "quoted-printable";
1048 }
1049 }
1050 }
1051 }