View Javadoc

1   /**
2    *
3    * Copyright 2003-2004 The Apache Software Foundation
4    *
5    *  Licensed under the Apache License, Version 2.0 (the "License");
6    *  you may not use this file except in compliance with the License.
7    *  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   */
17  
18  package javax.mail.internet;
19  
20  import java.io.BufferedInputStream;
21  import java.io.BufferedReader;
22  import java.io.ByteArrayInputStream;
23  import java.io.ByteArrayOutputStream;
24  import java.io.IOException;
25  import java.io.InputStream;
26  import java.io.InputStreamReader;
27  import java.io.OutputStream;
28  import java.io.UnsupportedEncodingException;
29  import java.util.HashMap;
30  import java.util.Map;
31  import java.util.NoSuchElementException;
32  import java.util.StringTokenizer;
33  
34  import javax.activation.DataHandler;
35  import javax.activation.DataSource;
36  import javax.mail.MessagingException;
37  
38  import org.apache.geronimo.mail.util.ASCIIUtil;
39  import org.apache.geronimo.mail.util.Base64;
40  import org.apache.geronimo.mail.util.Base64DecoderStream;
41  import org.apache.geronimo.mail.util.Base64Encoder;
42  import org.apache.geronimo.mail.util.Base64EncoderStream;
43  import org.apache.geronimo.mail.util.QuotedPrintableDecoderStream;
44  import org.apache.geronimo.mail.util.QuotedPrintableEncoderStream;
45  import org.apache.geronimo.mail.util.QuotedPrintableEncoder;
46  import org.apache.geronimo.mail.util.QuotedPrintable;
47  import org.apache.geronimo.mail.util.SessionUtil;
48  import org.apache.geronimo.mail.util.UUDecoderStream;
49  import org.apache.geronimo.mail.util.UUEncoderStream;
50  
51  // encodings include "base64", "quoted-printable", "7bit", "8bit" and "binary".
52  // In addition, "uuencode" is also supported. The
53  
54  /**
55   * @version $Rev: 412426 $ $Date: 2006-06-07 08:21:46 -0700 (Wed, 07 Jun 2006) $
56   */
57  public class MimeUtility {
58  
59      private static final String MIME_FOLDENCODEDWORDS = "mail.mime.foldencodedwords";
60      private static final String MIME_DECODE_TEXT_STRICT = "mail.mime.decodetext.strict";
61  
62      private MimeUtility() {
63      }
64  
65      public static final int ALL = -1;
66  
67      private static String defaultJavaCharset;
68      private static String escapedChars = "\"\\\r\n";
69      private static String linearWhiteSpace = " \t\r\n";
70  
71      private static String QP_WORD_SPECIALS = "=_?\"#$%&'(),.:;<>@[\\]^`{|}~";
72      private static String QP_TEXT_SPECIALS = "=_?";
73  
74      // the javamail spec includes the ability to map java encoding names to MIME-specified names.  Normally,
75      // these values are loaded from a character mapping file.
76      private static Map java2mime;
77      private static Map mime2java;
78  
79      static {
80          // we need to load the mapping tables used by javaCharset() and mimeCharset().
81          loadCharacterSetMappings();
82      }
83  
84      public static InputStream decode(InputStream in, String encoding) throws MessagingException {
85          encoding = encoding.toLowerCase();
86  
87          // some encodies are just pass-throughs, with no real decoding.
88          if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
89              return in;
90          }
91          else if (encoding.equals("base64")) {
92              return new Base64DecoderStream(in);
93          }
94          // UUEncode is known by a couple historical extension names too.
95          else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
96              return new UUDecoderStream(in);
97          }
98          else if (encoding.equals("quoted-printable")) {
99              return new QuotedPrintableDecoderStream(in);
100         }
101         else {
102             throw new MessagingException("Unknown encoding " + encoding);
103         }
104     }
105 
106     /**
107      * Decode a string of text obtained from a mail header into
108      * it's proper form.  The text generally will consist of a
109      * string of tokens, some of which may be encoded using
110      * base64 encoding.
111      *
112      * @param text   The text to decode.
113      *
114      * @return The decoded test string.
115      * @exception UnsupportedEncodingException
116      */
117     public static String decodeText(String text) throws UnsupportedEncodingException {
118         // if the text contains any encoded tokens, those tokens will be marked with "=?".  If the
119         // source string doesn't contain that sequent, no decoding is required.
120         if (text.indexOf("=?") < 0) {
121             return text;
122         }
123 
124         // we have two sets of rules we can apply.
125         if (!SessionUtil.getBooleanProperty(MIME_DECODE_TEXT_STRICT, true)) {
126             return decodeTextNonStrict(text);
127         }
128 
129         int offset = 0;
130         int endOffset = text.length();
131 
132         int startWhiteSpace = -1;
133         int endWhiteSpace = -1;
134 
135         StringBuffer decodedText = new StringBuffer(text.length());
136 
137         boolean previousTokenEncoded = false;
138 
139         while (offset < endOffset) {
140             char ch = text.charAt(offset);
141 
142             // is this a whitespace character?
143             if (linearWhiteSpace.indexOf(ch) != -1) {
144                 startWhiteSpace = offset;
145                 while (offset < endOffset) {
146                     // step over the white space characters.
147                     ch = text.charAt(offset);
148                     if (linearWhiteSpace.indexOf(ch) != -1) {
149                         offset++;
150                     }
151                     else {
152                         // record the location of the first non lwsp and drop down to process the
153                         // token characters.
154                         endWhiteSpace = offset;
155                         break;
156                     }
157                 }
158             }
159             else {
160                 // we have a word token.  We need to scan over the word and then try to parse it.
161                 int wordStart = offset;
162 
163                 while (offset < endOffset) {
164                     // step over the white space characters.
165                     ch = text.charAt(offset);
166                     if (linearWhiteSpace.indexOf(ch) == -1) {
167                         offset++;
168                     }
169                     else {
170                         break;
171                     }
172 
173                     //NB:  Trailing whitespace on these header strings will just be discarded.
174                 }
175                 // pull out the word token.
176                 String word = text.substring(wordStart, offset);
177                 // is the token encoded?  decode the word
178                 if (word.startsWith("=?")) {
179                     try {
180                         // if this gives a parsing failure, treat it like a non-encoded word.
181                         String decodedWord = decodeWord(word);
182 
183                         // are any whitespace characters significant?  Append 'em if we've got 'em.
184                         if (!previousTokenEncoded) {
185                             if (startWhiteSpace != -1) {
186                                 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
187                                 startWhiteSpace = -1;
188                             }
189                         }
190                         // this is definitely a decoded token.
191                         previousTokenEncoded = true;
192                         // and add this to the text.
193                         decodedText.append(decodedWord);
194                         // we continue parsing from here...we allow parsing errors to fall through
195                         // and get handled as normal text.
196                         continue;
197 
198                     } catch (ParseException e) {
199                     }
200                 }
201                 // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
202                 // if we have it.
203                 if (startWhiteSpace != -1) {
204                     decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
205                     startWhiteSpace = -1;
206                 }
207                 // this is not a decoded token.
208                 previousTokenEncoded = false;
209                 decodedText.append(word);
210             }
211         }
212 
213         return decodedText.toString();
214     }
215 
216 
217     /**
218      * Decode a string of text obtained from a mail header into
219      * it's proper form.  The text generally will consist of a
220      * string of tokens, some of which may be encoded using
221      * base64 encoding.  This is for non-strict decoded for mailers that
222      * violate the RFC 2047 restriction that decoded tokens must be delimited
223      * by linear white space.  This will scan tokens looking for inner tokens
224      * enclosed in "=?" -- "?=" pairs.
225      *
226      * @param text   The text to decode.
227      *
228      * @return The decoded test string.
229      * @exception UnsupportedEncodingException
230      */
231     private static String decodeTextNonStrict(String text) throws UnsupportedEncodingException {
232         int offset = 0;
233         int endOffset = text.length();
234 
235         int startWhiteSpace = -1;
236         int endWhiteSpace = -1;
237 
238         StringBuffer decodedText = new StringBuffer(text.length());
239 
240         boolean previousTokenEncoded = false;
241 
242         while (offset < endOffset) {
243             char ch = text.charAt(offset);
244 
245             // is this a whitespace character?
246             if (linearWhiteSpace.indexOf(ch) != -1) {
247                 startWhiteSpace = offset;
248                 while (offset < endOffset) {
249                     // step over the white space characters.
250                     ch = text.charAt(offset);
251                     if (linearWhiteSpace.indexOf(ch) != -1) {
252                         offset++;
253                     }
254                     else {
255                         // record the location of the first non lwsp and drop down to process the
256                         // token characters.
257                         endWhiteSpace = offset;
258                         break;
259                     }
260                 }
261             }
262             else {
263                 // we're at the start of a word token.  We potentially need to break this up into subtokens
264                 int wordStart = offset;
265 
266                 while (offset < endOffset) {
267                     // step over the white space characters.
268                     ch = text.charAt(offset);
269                     if (linearWhiteSpace.indexOf(ch) == -1) {
270                         offset++;
271                     }
272                     else {
273                         break;
274                     }
275 
276                     //NB:  Trailing whitespace on these header strings will just be discarded.
277                 }
278                 // pull out the word token.
279                 String word = text.substring(wordStart, offset);
280 
281                 int decodeStart = 0;
282 
283                 // now scan and process each of the bits within here.
284                 while (decodeStart < word.length()) {
285                     int tokenStart = word.indexOf("=?", decodeStart);
286                     if (tokenStart == -1) {
287                         // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
288                         // if we have it.
289                         if (startWhiteSpace != -1) {
290                             decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
291                             startWhiteSpace = -1;
292                         }
293                         // this is not a decoded token.
294                         previousTokenEncoded = false;
295                         decodedText.append(word.substring(decodeStart));
296                         // we're finished.
297                         break;
298                     }
299                     // we have something to process
300                     else {
301                         // we might have a normal token preceeding this.
302                         if (tokenStart != decodeStart) {
303                             // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
304                             // if we have it.
305                             if (startWhiteSpace != -1) {
306                                 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
307                                 startWhiteSpace = -1;
308                             }
309                             // this is not a decoded token.
310                             previousTokenEncoded = false;
311                             decodedText.append(word.substring(decodeStart, tokenStart));
312                         }
313 
314                         // now find the end marker.
315                         int tokenEnd = word.indexOf("?=", tokenStart);
316                         // sigh, an invalid token.  Treat this as plain text.
317                         if (tokenEnd == -1) {
318                             // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
319                             // if we have it.
320                             if (startWhiteSpace != -1) {
321                                 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
322                                 startWhiteSpace = -1;
323                             }
324                             // this is not a decoded token.
325                             previousTokenEncoded = false;
326                             decodedText.append(word.substring(tokenStart));
327                             // we're finished.
328                             break;
329                         }
330                         else {
331                             // update our ticker
332                             decodeStart = tokenEnd + 2;
333 
334                             String token = word.substring(tokenStart, tokenEnd);
335                             try {
336                                 // if this gives a parsing failure, treat it like a non-encoded word.
337                                 String decodedWord = decodeWord(token);
338 
339                                 // are any whitespace characters significant?  Append 'em if we've got 'em.
340                                 if (!previousTokenEncoded) {
341                                     if (startWhiteSpace != -1) {
342                                         decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
343                                         startWhiteSpace = -1;
344                                     }
345                                 }
346                                 // this is definitely a decoded token.
347                                 previousTokenEncoded = true;
348                                 // and add this to the text.
349                                 decodedText.append(decodedWord);
350                                 // we continue parsing from here...we allow parsing errors to fall through
351                                 // and get handled as normal text.
352                                 continue;
353 
354                             } catch (ParseException e) {
355                             }
356                             // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
357                             // if we have it.
358                             if (startWhiteSpace != -1) {
359                                 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
360                                 startWhiteSpace = -1;
361                             }
362                             // this is not a decoded token.
363                             previousTokenEncoded = false;
364                             decodedText.append(token);
365                         }
366                     }
367                 }
368             }
369         }
370 
371         return decodedText.toString();
372     }
373 
374     /**
375      * Parse a string using the RFC 2047 rules for an "encoded-word"
376      * type.  This encoding has the syntax:
377      *
378      * encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
379      *
380      * @param word   The possibly encoded word value.
381      *
382      * @return The decoded word.
383      * @exception ParseException
384      * @exception UnsupportedEncodingException
385      */
386     public static String decodeWord(String word) throws ParseException, UnsupportedEncodingException {
387         // encoded words start with the characters "=?".  If this not an encoded word, we throw a
388         // ParseException for the caller.
389 
390         if (!word.startsWith("=?")) {
391             throw new ParseException("Invalid RFC 2047 encoded-word: " + word);
392         }
393 
394         int charsetPos = word.indexOf('?', 2);
395         if (charsetPos == -1) {
396             throw new ParseException("Missing charset in RFC 2047 encoded-word: " + word);
397         }
398 
399         // pull out the character set information (this is the MIME name at this point).
400         String charset = word.substring(2, charsetPos).toLowerCase();
401 
402         // now pull out the encoding token the same way.
403         int encodingPos = word.indexOf('?', charsetPos + 1);
404         if (encodingPos == -1) {
405             throw new ParseException("Missing encoding in RFC 2047 encoded-word: " + word);
406         }
407 
408         String encoding = word.substring(charsetPos + 1, encodingPos);
409 
410         // and finally the encoded text.
411         int encodedTextPos = word.indexOf("?=", encodingPos + 1);
412         if (encodedTextPos == -1) {
413             throw new ParseException("Missing encoded text in RFC 2047 encoded-word: " + word);
414         }
415 
416         String encodedText = word.substring(encodingPos + 1, encodedTextPos);
417 
418         // seems a bit silly to encode a null string, but easy to deal with.
419         if (encodedText.length() == 0) {
420             return "";
421         }
422 
423         try {
424             // the decoder writes directly to an output stream.
425             ByteArrayOutputStream out = new ByteArrayOutputStream(encodedText.length());
426 
427             byte[] encodedData = encodedText.getBytes("US-ASCII");
428 
429             // Base64 encoded?
430             if (encoding.equals("B")) {
431                 Base64.decode(encodedData, out);
432             }
433             // maybe quoted printable.
434             else if (encoding.equals("Q")) {
435                 QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
436                 dataEncoder.decodeWord(encodedData, out);
437             }
438             else {
439                 throw new UnsupportedEncodingException("Unknown RFC 2047 encoding: " + encoding);
440             }
441             // get the decoded byte data and convert into a string.
442             byte[] decodedData = out.toByteArray();
443             return new String(decodedData, javaCharset(charset));
444         } catch (IOException e) {
445             throw new UnsupportedEncodingException("Invalid RFC 2047 encoding");
446         }
447 
448     }
449 
450     /**
451      * Wrap an encoder around a given output stream.
452      *
453      * @param out      The output stream to wrap.
454      * @param encoding The name of the encoding.
455      *
456      * @return A instance of FilterOutputStream that manages on the fly
457      *         encoding for the requested encoding type.
458      * @exception MessagingException
459      */
460     public static OutputStream encode(OutputStream out, String encoding) throws MessagingException {
461         // no encoding specified, so assume it goes out unchanged.
462         if (encoding == null) {
463             return out;
464         }
465 
466         encoding = encoding.toLowerCase();
467 
468         // some encodies are just pass-throughs, with no real decoding.
469         if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
470             return out;
471         }
472         else if (encoding.equals("base64")) {
473             return new Base64EncoderStream(out);
474         }
475         // UUEncode is known by a couple historical extension names too.
476         else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
477             return new UUEncoderStream(out);
478         }
479         else if (encoding.equals("quoted-printable")) {
480             return new QuotedPrintableEncoderStream(out);
481         }
482         else {
483             throw new MessagingException("Unknown encoding " + encoding);
484         }
485     }
486 
487     /**
488      * Wrap an encoder around a given output stream.
489      *
490      * @param out      The output stream to wrap.
491      * @param encoding The name of the encoding.
492      * @param filename The filename of the data being sent (only used for UUEncode).
493      *
494      * @return A instance of FilterOutputStream that manages on the fly
495      *         encoding for the requested encoding type.
496      * @exception MessagingException
497      */
498     public static OutputStream encode(OutputStream out, String encoding, String filename) throws MessagingException {
499         encoding = encoding.toLowerCase();
500 
501         // some encodies are just pass-throughs, with no real decoding.
502         if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
503             return out;
504         }
505         else if (encoding.equals("base64")) {
506             return new Base64EncoderStream(out);
507         }
508         // UUEncode is known by a couple historical extension names too.
509         else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
510             return new UUEncoderStream(out, filename);
511         }
512         else if (encoding.equals("quoted-printable")) {
513              return new QuotedPrintableEncoderStream(out);
514         }
515         else {
516             throw new MessagingException("Unknown encoding " + encoding);
517         }
518     }
519 
520 
521     public static String encodeText(String word) throws UnsupportedEncodingException {
522         return encodeText(word, null, null);
523     }
524 
525     public static String encodeText(String word, String charset, String encoding) throws UnsupportedEncodingException {
526         return encodeWord(word, charset, encoding, false);
527     }
528 
529     public static String encodeWord(String word) throws UnsupportedEncodingException {
530         return encodeWord(word, null, null);
531     }
532 
533     public static String encodeWord(String word, String charset, String encoding) throws UnsupportedEncodingException {
534         return encodeWord(word, charset, encoding, true);
535     }
536 
537 
538     private static String encodeWord(String word, String charset, String encoding, boolean encodingWord) throws UnsupportedEncodingException {
539 
540         // figure out what we need to encode this.
541         String encoder = ASCIIUtil.getTextTransferEncoding(word);
542         // all ascii?  We can return this directly,
543         if (encoder.equals("7bit")) {
544             return word;
545         }
546 
547         // if not given a charset, use the default.
548         if (charset == null) {
549             charset = getDefaultMIMECharset();
550         }
551 
552         // sort out the encoder.  If not explicitly given, use the best guess we've already established.
553         if (encoding != null) {
554             if (encoding.equalsIgnoreCase("B")) {
555                 encoder = "base64";
556             }
557             else if (encoding.equalsIgnoreCase("Q")) {
558                 encoder = "quoted-printable";
559             }
560             else {
561                 throw new UnsupportedEncodingException("Unknown transfer encoding: " + encoding);
562             }
563         }
564 
565         try {
566             // get the string bytes in the correct source charset
567             InputStream in = new ByteArrayInputStream(word.getBytes( javaCharset(charset)));
568             ByteArrayOutputStream out = new ByteArrayOutputStream();
569 
570             if (encoder.equals("base64")) {
571                 Base64Encoder dataEncoder = new Base64Encoder();
572                 dataEncoder.encodeWord(in, charset, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false));
573             }
574             else {
575                 QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
576                 dataEncoder.encodeWord(in, charset, encodingWord ? QP_WORD_SPECIALS : QP_TEXT_SPECIALS, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false));
577             }
578 
579             byte[] bytes = out.toByteArray();
580             return new String(bytes);
581         } catch (IOException e) {
582             throw new UnsupportedEncodingException("Invalid encoding");
583         }
584     }
585 
586 
587     /**
588      * Examine the content of a data source and decide what type
589      * of transfer encoding should be used.  For text streams,
590      * we'll decided between 7bit, quoted-printable, and base64.
591      * For binary content types, we'll use either 7bit or base64.
592      *
593      * @param handler The DataHandler associated with the content.
594      *
595      * @return The string name of an encoding used to transfer the content.
596      */
597     public static String getEncoding(DataHandler handler) {
598 
599 
600         // if this handler has an associated data source, we can read directly from the
601         // data source to make this judgment.  This is generally MUCH faster than asking the
602         // DataHandler to write out the data for us.
603         DataSource ds = handler.getDataSource();
604         if (ds != null) {
605             return getEncoding(ds);
606         }
607 
608         try {
609             // get a parser that allows us to make comparisons.
610             ContentType content = new ContentType(ds.getContentType());
611 
612             // The only access to the content bytes at this point is by asking the handler to write
613             // the information out to a stream.  We're going to pipe this through a special stream
614             // that examines the bytes as they go by.
615             ContentCheckingOutputStream checker = new ContentCheckingOutputStream();
616 
617             handler.writeTo(checker);
618 
619             // figure this out based on whether we believe this to be a text type or not.
620             if (content.match("text/*")) {
621                 return checker.getTextTransferEncoding();
622             }
623             else {
624                 return checker.getBinaryTransferEncoding();
625             }
626 
627         } catch (Exception e) {
628             // any unexpected I/O exceptions we'll force to a "safe" fallback position.
629             return "base64";
630         }
631     }
632 
633 
634     /**
635      * Determine the what transfer encoding should be used for
636      * data retrieved from a DataSource.
637      *
638      * @param source The DataSource for the transmitted data.
639      *
640      * @return The string name of the encoding form that should be used for
641      *         the data.
642      */
643     public static String getEncoding(DataSource source) {
644         InputStream in = null;
645 
646         try {
647             // get a parser that allows us to make comparisons.
648             ContentType content = new ContentType(source.getContentType());
649 
650             // we're probably going to have to scan the data.
651             in = source.getInputStream();
652 
653             if (!content.match("text/*")) {
654                 // Not purporting to be a text type?  Examine the content to see we might be able to
655                 // at least pretend it is an ascii type.
656                 return ASCIIUtil.getBinaryTransferEncoding(in);
657             }
658             else {
659                 return ASCIIUtil.getTextTransferEncoding(in);
660             }
661         } catch (Exception e) {
662             // this was a problem...not sure what makes sense here, so we'll assume it's binary
663             // and we need to transfer this using Base64 encoding.
664             return "base64";
665         } finally {
666             // make sure we close the stream
667             try {
668                 if (in != null) {
669                     in.close();
670                 }
671             } catch (IOException e) {
672             }
673         }
674     }
675 
676 
677     /**
678      * Quote a "word" value.  If the word contains any character from
679      * the specified "specials" list, this value is returned as a
680      * quoted strong.  Otherwise, it is returned unchanged (an "atom").
681      *
682      * @param word     The word requiring quoting.
683      * @param specials The set of special characters that can't appear in an unquoted
684      *                 string.
685      *
686      * @return The quoted value.  This will be unchanged if the word doesn't contain
687      *         any of the designated special characters.
688      */
689     public static String quote(String word, String specials) {
690         int wordLength = word.length();
691         boolean requiresQuoting = false;
692         // scan the string looking for problem characters
693         for (int i =0; i < wordLength; i++) {
694             char ch = word.charAt(i);
695             // special escaped characters require escaping, which also implies quoting.
696             if (escapedChars.indexOf(ch) >= 0) {
697                 return quoteAndEscapeString(word);
698             }
699             // now check for control characters or the designated special characters.
700             if (ch < 32 || ch >= 127 || specials.indexOf(ch) >= 0) {
701                 // we know this requires quoting, but we still need to scan the entire string to
702                 // see if contains chars that require escaping.  Just go ahead and treat it as if it does.
703                 return quoteAndEscapeString(word);
704             }
705         }
706         return word;
707     }
708 
709     /**
710      * Take a string and return it as a formatted quoted string, with
711      * all characters requiring escaping handled properly.
712      *
713      * @param word   The string to quote.
714      *
715      * @return The quoted string.
716      */
717     private static String quoteAndEscapeString(String word) {
718         int wordLength = word.length();
719         // allocate at least enough for the string and two quotes plus a reasonable number of escaped chars.
720         StringBuffer buffer = new StringBuffer(wordLength + 10);
721         // add the leading quote.
722         buffer.append('"');
723 
724         for (int i = 0; i < wordLength; i++) {
725             char ch = word.charAt(i);
726             // is this an escaped char?
727             if (escapedChars.indexOf(ch) >= 0) {
728                 // add the escape marker before appending.
729                 buffer.append('\\');
730             }
731             buffer.append(ch);
732         }
733         // now the closing quote
734         buffer.append('"');
735         return buffer.toString();
736     }
737 
738     /**
739      * Translate a MIME standard character set name into the Java
740      * equivalent.
741      *
742      * @param charset The MIME standard name.
743      *
744      * @return The Java equivalent for this name.
745      */
746     public static String javaCharset(String charset) {
747         // nothing in, nothing out.
748         if (charset == null) {
749             return null;
750         }
751 
752         String mappedCharset = (String)mime2java.get(charset.toLowerCase());
753         // if there is no mapping, then the original name is used.  Many of the MIME character set
754         // names map directly back into Java.  The reverse isn't necessarily true.
755         return mappedCharset == null ? charset : mappedCharset;
756     }
757 
758     /**
759      * Map a Java character set name into the MIME equivalent.
760      *
761      * @param charset The java character set name.
762      *
763      * @return The MIME standard equivalent for this character set name.
764      */
765     public static String mimeCharset(String charset) {
766         // nothing in, nothing out.
767         if (charset == null) {
768             return null;
769         }
770 
771         String mappedCharset = (String)java2mime.get(charset.toLowerCase());
772         // if there is no mapping, then the original name is used.  Many of the MIME character set
773         // names map directly back into Java.  The reverse isn't necessarily true.
774         return mappedCharset == null ? charset : mappedCharset;
775     }
776 
777 
778     /**
779      * Get the default character set to use, in Java name format.
780      * This either be the value set with the mail.mime.charset
781      * system property or obtained from the file.encoding system
782      * property.  If neither of these is set, we fall back to
783      * 8859_1 (basically US-ASCII).
784      *
785      * @return The character string value of the default character set.
786      */
787     public static String getDefaultJavaCharset() {
788         String charset = SessionUtil.getProperty("mail.mime.charset");
789         if (charset != null) {
790             return javaCharset(charset);
791         }
792         return SessionUtil.getProperty("file.encoding", "8859_1");
793     }
794 
795     /**
796      * Get the default character set to use, in MIME name format.
797      * This either be the value set with the mail.mime.charset
798      * system property or obtained from the file.encoding system
799      * property.  If neither of these is set, we fall back to
800      * 8859_1 (basically US-ASCII).
801      *
802      * @return The character string value of the default character set.
803      */
804     static String getDefaultMIMECharset() {
805         // if the property is specified, this can be used directly.
806         String charset = SessionUtil.getProperty("mail.mime.charset");
807         if (charset != null) {
808             return charset;
809         }
810 
811         // get the Java-defined default and map back to a MIME name.
812         return mimeCharset(SessionUtil.getProperty("file.encoding", "8859_1"));
813     }
814 
815 
816     /**
817      * Load the default mapping tables used by the javaCharset()
818      * and mimeCharset() methods.  By default, these tables are
819      * loaded from the /META-INF/javamail.charset.map file.  If
820      * something goes wrong loading that file, we configure things
821      * with a default mapping table (which just happens to mimic
822      * what's in the default mapping file).
823      */
824     static private void loadCharacterSetMappings() {
825         java2mime = new HashMap();
826         mime2java = new HashMap();
827 
828 
829         // normally, these come from a character map file contained in the jar file.
830         try {
831             InputStream map = javax.mail.internet.MimeUtility.class.getResourceAsStream("/META-INF/javamail.charset.map");
832 
833             if (map != null) {
834                 // get a reader for this so we can load.
835                 BufferedReader reader = new BufferedReader(new InputStreamReader(map));
836 
837                 readMappings(reader, java2mime);
838                 readMappings(reader, mime2java);
839             }
840         } catch (Exception e) {
841         }
842 
843         // if any sort of error occurred reading the preferred file version, we could end up with empty
844         // mapping tables.  This could cause all sorts of difficulty, so ensure they are populated with at
845         // least a reasonable set of defaults.
846 
847         // these mappings echo what's in the default file.
848         if (java2mime.isEmpty()) {
849             java2mime.put("8859_1", "ISO-8859-1");
850             java2mime.put("iso8859_1", "ISO-8859-1");
851             java2mime.put("iso8859-1", "ISO-8859-1");
852 
853             java2mime.put("8859_2", "ISO-8859-2");
854             java2mime.put("iso8859_2", "ISO-8859-2");
855             java2mime.put("iso8859-2", "ISO-8859-2");
856 
857             java2mime.put("8859_3", "ISO-8859-3");
858             java2mime.put("iso8859_3", "ISO-8859-3");
859             java2mime.put("iso8859-3", "ISO-8859-3");
860 
861             java2mime.put("8859_4", "ISO-8859-4");
862             java2mime.put("iso8859_4", "ISO-8859-4");
863             java2mime.put("iso8859-4", "ISO-8859-4");
864 
865             java2mime.put("8859_5", "ISO-8859-5");
866             java2mime.put("iso8859_5", "ISO-8859-5");
867             java2mime.put("iso8859-5", "ISO-8859-5");
868 
869             java2mime.put ("8859_6", "ISO-8859-6");
870             java2mime.put("iso8859_6", "ISO-8859-6");
871             java2mime.put("iso8859-6", "ISO-8859-6");
872 
873             java2mime.put("8859_7", "ISO-8859-7");
874             java2mime.put("iso8859_7", "ISO-8859-7");
875             java2mime.put("iso8859-7", "ISO-8859-7");
876 
877             java2mime.put("8859_8", "ISO-8859-8");
878             java2mime.put("iso8859_8", "ISO-8859-8");
879             java2mime.put("iso8859-8", "ISO-8859-8");
880 
881             java2mime.put("8859_9", "ISO-8859-9");
882             java2mime.put("iso8859_9", "ISO-8859-9");
883             java2mime.put("iso8859-9", "ISO-8859-9");
884 
885             java2mime.put("sjis", "Shift_JIS");
886             java2mime.put ("jis", "ISO-2022-JP");
887             java2mime.put("iso2022jp", "ISO-2022-JP");
888             java2mime.put("euc_jp", "euc-jp");
889             java2mime.put("koi8_r", "koi8-r");
890             java2mime.put("euc_cn", "euc-cn");
891             java2mime.put("euc_tw", "euc-tw");
892             java2mime.put("euc_kr", "euc-kr");
893         }
894 
895         if (mime2java.isEmpty ()) {
896             mime2java.put("iso-2022-cn", "ISO2022CN");
897             mime2java.put("iso-2022-kr", "ISO2022KR");
898             mime2java.put("utf-8", "UTF8");
899             mime2java.put("utf8", "UTF8");
900             mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
901             mime2java.put("ja_jp.eucjp", "EUCJIS");
902             mime2java.put ("euc-kr", "KSC5601");
903             mime2java.put("euckr", "KSC5601");
904             mime2java.put("us-ascii", "ISO-8859-1");
905             mime2java.put("x-us-ascii", "ISO-8859-1");
906         }
907     }
908 
909 
910     /**
911      * Read a section of a character map table and populate the
912      * target mapping table with the information.  The table end
913      * is marked by a line starting with "--" and also ending with
914      * "--".  Blank lines and comment lines (beginning with '#') are
915      * ignored.
916      *
917      * @param reader The source of the file information.
918      * @param table  The mapping table used to store the information.
919      */
920     static private void readMappings(BufferedReader reader, Map table) throws IOException {
921         // process lines to the EOF or the end of table marker.
922         while (true) {
923             String line = reader.readLine();
924             // no line returned is an EOF
925             if (line == null) {
926                 return;
927             }
928 
929             // trim so we're not messed up by trailing blanks
930             line = line.trim();
931 
932             if (line.length() == 0 || line.startsWith("#")) {
933                 continue;
934             }
935 
936             // stop processing if this is the end-of-table marker.
937             if (line.startsWith("--") && line.endsWith("--")) {
938                 return;
939             }
940 
941             // we allow either blanks or tabs as token delimiters.
942             StringTokenizer tokenizer = new StringTokenizer(line, " \t");
943 
944             try {
945                 String from = tokenizer.nextToken().toLowerCase();
946                 String to = tokenizer.nextToken();
947 
948                 table.put(from, to);
949             } catch (NoSuchElementException e) {
950                 // just ignore the line if invalid.
951             }
952         }
953     }
954 
955 
956 }
957 
958 
959 /**
960  * Utility class for examining content information written out
961  * by a DataHandler object.  This stream gathers statistics on
962  * the stream so it can make transfer encoding determinations.
963  */
964 class ContentCheckingOutputStream extends OutputStream {
965     private int asciiChars = 0;
966     private int nonAsciiChars = 0;
967     private boolean containsLongLines = false;
968     private boolean containsMalformedEOL = false;
969     private int previousChar = 0;
970     private int span = 0;
971 
972     ContentCheckingOutputStream() {
973     }
974 
975     public void write(byte[] data) throws IOException {
976         write(data, 0, data.length);
977     }
978 
979     public void write(byte[] data, int offset, int length) throws IOException {
980         for (int i = 0; i < length; i++) {
981             write(data[offset + i]);
982         }
983     }
984 
985     public void write(int ch) {
986         // we found a linebreak.  Reset the line length counters on either one.  We don't
987         // really need to validate here.
988         if (ch == '\n' || ch == '\r') {
989             // we found a newline, this is only valid if the previous char was the '\r'
990             if (ch == '\n') {
991                 // malformed linebreak?  force this to base64 encoding.
992                 if (previousChar != '\r') {
993                     containsMalformedEOL = true;
994                 }
995             }
996             // hit a line end, reset our line length counter
997             span = 0;
998         }
999         else {
1000             span++;
1001             // the text has long lines, we can't transfer this as unencoded text.
1002             if (span > 998) {
1003                 containsLongLines = true;
1004             }
1005 
1006             // non-ascii character, we have to transfer this in binary.
1007             if (!ASCIIUtil.isAscii(ch)) {
1008                 nonAsciiChars++;
1009             }
1010             else {
1011                 asciiChars++;
1012             }
1013         }
1014         previousChar = ch;
1015     }
1016 
1017 
1018     public String getBinaryTransferEncoding() {
1019         if (nonAsciiChars != 0 || containsLongLines || containsMalformedEOL) {
1020             return "base64";
1021         }
1022         else {
1023             return "7bit";
1024         }
1025     }
1026 
1027     public String getTextTransferEncoding() {
1028         // looking good so far, only valid chars here.
1029         if (nonAsciiChars == 0) {
1030             // does this contain long text lines?  We need to use a Q-P encoding which will
1031             // be only slightly longer, but handles folding the longer lines.
1032             if (containsLongLines) {
1033                 return "quoted-printable";
1034             }
1035             else {
1036                 // ideal!  Easiest one to handle.
1037                 return "7bit";
1038             }
1039         }
1040         else {
1041             // mostly characters requiring encoding?  Base64 is our best bet.
1042             if (nonAsciiChars > asciiChars) {
1043                 return "base64";
1044             }
1045             else {
1046                 // Q-P encoding will use fewer bytes than the full Base64.
1047                 return "quoted-printable";
1048             }
1049         }
1050     }
1051 }