001    /**
002     *
003     * Copyright 2003-2004 The Apache Software Foundation
004     *
005     *  Licensed under the Apache License, Version 2.0 (the "License");
006     *  you may not use this file except in compliance with the License.
007     *  You may obtain a copy of the License at
008     *
009     *     http://www.apache.org/licenses/LICENSE-2.0
010     *
011     *  Unless required by applicable law or agreed to in writing, software
012     *  distributed under the License is distributed on an "AS IS" BASIS,
013     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     *  See the License for the specific language governing permissions and
015     *  limitations under the License.
016     */
017    
018    package javax.mail.internet;
019    
020    import java.io.BufferedInputStream;
021    import java.io.BufferedReader;
022    import java.io.ByteArrayInputStream;
023    import java.io.ByteArrayOutputStream;
024    import java.io.IOException;
025    import java.io.InputStream;
026    import java.io.InputStreamReader;
027    import java.io.OutputStream;
028    import java.io.UnsupportedEncodingException;
029    import java.util.HashMap;
030    import java.util.Map;
031    import java.util.NoSuchElementException;
032    import java.util.StringTokenizer;
033    
034    import javax.activation.DataHandler;
035    import javax.activation.DataSource;
036    import javax.mail.MessagingException;
037    
038    import org.apache.geronimo.mail.util.ASCIIUtil;
039    import org.apache.geronimo.mail.util.Base64;
040    import org.apache.geronimo.mail.util.Base64DecoderStream;
041    import org.apache.geronimo.mail.util.Base64Encoder;
042    import org.apache.geronimo.mail.util.Base64EncoderStream;
043    import org.apache.geronimo.mail.util.QuotedPrintableDecoderStream;
044    import org.apache.geronimo.mail.util.QuotedPrintableEncoderStream;
045    import org.apache.geronimo.mail.util.QuotedPrintableEncoder;
046    import org.apache.geronimo.mail.util.QuotedPrintable;
047    import org.apache.geronimo.mail.util.SessionUtil;
048    import org.apache.geronimo.mail.util.UUDecoderStream;
049    import org.apache.geronimo.mail.util.UUEncoderStream;
050    
051    // encodings include "base64", "quoted-printable", "7bit", "8bit" and "binary".
052    // In addition, "uuencode" is also supported. The
053    
054    /**
055     * @version $Rev: 412426 $ $Date: 2006-06-07 08:21:46 -0700 (Wed, 07 Jun 2006) $
056     */
057    public class MimeUtility {
058    
059        private static final String MIME_FOLDENCODEDWORDS = "mail.mime.foldencodedwords";
060        private static final String MIME_DECODE_TEXT_STRICT = "mail.mime.decodetext.strict";
061    
062        private MimeUtility() {
063        }
064    
065        public static final int ALL = -1;
066    
067        private static String defaultJavaCharset;
068        private static String escapedChars = "\"\\\r\n";
069        private static String linearWhiteSpace = " \t\r\n";
070    
071        private static String QP_WORD_SPECIALS = "=_?\"#$%&'(),.:;<>@[\\]^`{|}~";
072        private static String QP_TEXT_SPECIALS = "=_?";
073    
074        // the javamail spec includes the ability to map java encoding names to MIME-specified names.  Normally,
075        // these values are loaded from a character mapping file.
076        private static Map java2mime;
077        private static Map mime2java;
078    
079        static {
080            // we need to load the mapping tables used by javaCharset() and mimeCharset().
081            loadCharacterSetMappings();
082        }
083    
084        public static InputStream decode(InputStream in, String encoding) throws MessagingException {
085            encoding = encoding.toLowerCase();
086    
087            // some encodies are just pass-throughs, with no real decoding.
088            if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
089                return in;
090            }
091            else if (encoding.equals("base64")) {
092                return new Base64DecoderStream(in);
093            }
094            // UUEncode is known by a couple historical extension names too.
095            else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
096                return new UUDecoderStream(in);
097            }
098            else if (encoding.equals("quoted-printable")) {
099                return new QuotedPrintableDecoderStream(in);
100            }
101            else {
102                throw new MessagingException("Unknown encoding " + encoding);
103            }
104        }
105    
106        /**
107         * Decode a string of text obtained from a mail header into
108         * it's proper form.  The text generally will consist of a
109         * string of tokens, some of which may be encoded using
110         * base64 encoding.
111         *
112         * @param text   The text to decode.
113         *
114         * @return The decoded test string.
115         * @exception UnsupportedEncodingException
116         */
117        public static String decodeText(String text) throws UnsupportedEncodingException {
118            // if the text contains any encoded tokens, those tokens will be marked with "=?".  If the
119            // source string doesn't contain that sequent, no decoding is required.
120            if (text.indexOf("=?") < 0) {
121                return text;
122            }
123    
124            // we have two sets of rules we can apply.
125            if (!SessionUtil.getBooleanProperty(MIME_DECODE_TEXT_STRICT, true)) {
126                return decodeTextNonStrict(text);
127            }
128    
129            int offset = 0;
130            int endOffset = text.length();
131    
132            int startWhiteSpace = -1;
133            int endWhiteSpace = -1;
134    
135            StringBuffer decodedText = new StringBuffer(text.length());
136    
137            boolean previousTokenEncoded = false;
138    
139            while (offset < endOffset) {
140                char ch = text.charAt(offset);
141    
142                // is this a whitespace character?
143                if (linearWhiteSpace.indexOf(ch) != -1) {
144                    startWhiteSpace = offset;
145                    while (offset < endOffset) {
146                        // step over the white space characters.
147                        ch = text.charAt(offset);
148                        if (linearWhiteSpace.indexOf(ch) != -1) {
149                            offset++;
150                        }
151                        else {
152                            // record the location of the first non lwsp and drop down to process the
153                            // token characters.
154                            endWhiteSpace = offset;
155                            break;
156                        }
157                    }
158                }
159                else {
160                    // we have a word token.  We need to scan over the word and then try to parse it.
161                    int wordStart = offset;
162    
163                    while (offset < endOffset) {
164                        // step over the white space characters.
165                        ch = text.charAt(offset);
166                        if (linearWhiteSpace.indexOf(ch) == -1) {
167                            offset++;
168                        }
169                        else {
170                            break;
171                        }
172    
173                        //NB:  Trailing whitespace on these header strings will just be discarded.
174                    }
175                    // pull out the word token.
176                    String word = text.substring(wordStart, offset);
177                    // is the token encoded?  decode the word
178                    if (word.startsWith("=?")) {
179                        try {
180                            // if this gives a parsing failure, treat it like a non-encoded word.
181                            String decodedWord = decodeWord(word);
182    
183                            // are any whitespace characters significant?  Append 'em if we've got 'em.
184                            if (!previousTokenEncoded) {
185                                if (startWhiteSpace != -1) {
186                                    decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
187                                    startWhiteSpace = -1;
188                                }
189                            }
190                            // this is definitely a decoded token.
191                            previousTokenEncoded = true;
192                            // and add this to the text.
193                            decodedText.append(decodedWord);
194                            // we continue parsing from here...we allow parsing errors to fall through
195                            // and get handled as normal text.
196                            continue;
197    
198                        } catch (ParseException e) {
199                        }
200                    }
201                    // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
202                    // if we have it.
203                    if (startWhiteSpace != -1) {
204                        decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
205                        startWhiteSpace = -1;
206                    }
207                    // this is not a decoded token.
208                    previousTokenEncoded = false;
209                    decodedText.append(word);
210                }
211            }
212    
213            return decodedText.toString();
214        }
215    
216    
217        /**
218         * Decode a string of text obtained from a mail header into
219         * it's proper form.  The text generally will consist of a
220         * string of tokens, some of which may be encoded using
221         * base64 encoding.  This is for non-strict decoded for mailers that
222         * violate the RFC 2047 restriction that decoded tokens must be delimited
223         * by linear white space.  This will scan tokens looking for inner tokens
224         * enclosed in "=?" -- "?=" pairs.
225         *
226         * @param text   The text to decode.
227         *
228         * @return The decoded test string.
229         * @exception UnsupportedEncodingException
230         */
231        private static String decodeTextNonStrict(String text) throws UnsupportedEncodingException {
232            int offset = 0;
233            int endOffset = text.length();
234    
235            int startWhiteSpace = -1;
236            int endWhiteSpace = -1;
237    
238            StringBuffer decodedText = new StringBuffer(text.length());
239    
240            boolean previousTokenEncoded = false;
241    
242            while (offset < endOffset) {
243                char ch = text.charAt(offset);
244    
245                // is this a whitespace character?
246                if (linearWhiteSpace.indexOf(ch) != -1) {
247                    startWhiteSpace = offset;
248                    while (offset < endOffset) {
249                        // step over the white space characters.
250                        ch = text.charAt(offset);
251                        if (linearWhiteSpace.indexOf(ch) != -1) {
252                            offset++;
253                        }
254                        else {
255                            // record the location of the first non lwsp and drop down to process the
256                            // token characters.
257                            endWhiteSpace = offset;
258                            break;
259                        }
260                    }
261                }
262                else {
263                    // we're at the start of a word token.  We potentially need to break this up into subtokens
264                    int wordStart = offset;
265    
266                    while (offset < endOffset) {
267                        // step over the white space characters.
268                        ch = text.charAt(offset);
269                        if (linearWhiteSpace.indexOf(ch) == -1) {
270                            offset++;
271                        }
272                        else {
273                            break;
274                        }
275    
276                        //NB:  Trailing whitespace on these header strings will just be discarded.
277                    }
278                    // pull out the word token.
279                    String word = text.substring(wordStart, offset);
280    
281                    int decodeStart = 0;
282    
283                    // now scan and process each of the bits within here.
284                    while (decodeStart < word.length()) {
285                        int tokenStart = word.indexOf("=?", decodeStart);
286                        if (tokenStart == -1) {
287                            // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
288                            // if we have it.
289                            if (startWhiteSpace != -1) {
290                                decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
291                                startWhiteSpace = -1;
292                            }
293                            // this is not a decoded token.
294                            previousTokenEncoded = false;
295                            decodedText.append(word.substring(decodeStart));
296                            // we're finished.
297                            break;
298                        }
299                        // we have something to process
300                        else {
301                            // we might have a normal token preceeding this.
302                            if (tokenStart != decodeStart) {
303                                // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
304                                // if we have it.
305                                if (startWhiteSpace != -1) {
306                                    decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
307                                    startWhiteSpace = -1;
308                                }
309                                // this is not a decoded token.
310                                previousTokenEncoded = false;
311                                decodedText.append(word.substring(decodeStart, tokenStart));
312                            }
313    
314                            // now find the end marker.
315                            int tokenEnd = word.indexOf("?=", tokenStart);
316                            // sigh, an invalid token.  Treat this as plain text.
317                            if (tokenEnd == -1) {
318                                // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
319                                // if we have it.
320                                if (startWhiteSpace != -1) {
321                                    decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
322                                    startWhiteSpace = -1;
323                                }
324                                // this is not a decoded token.
325                                previousTokenEncoded = false;
326                                decodedText.append(word.substring(tokenStart));
327                                // we're finished.
328                                break;
329                            }
330                            else {
331                                // update our ticker
332                                decodeStart = tokenEnd + 2;
333    
334                                String token = word.substring(tokenStart, tokenEnd);
335                                try {
336                                    // if this gives a parsing failure, treat it like a non-encoded word.
337                                    String decodedWord = decodeWord(token);
338    
339                                    // are any whitespace characters significant?  Append 'em if we've got 'em.
340                                    if (!previousTokenEncoded) {
341                                        if (startWhiteSpace != -1) {
342                                            decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
343                                            startWhiteSpace = -1;
344                                        }
345                                    }
346                                    // this is definitely a decoded token.
347                                    previousTokenEncoded = true;
348                                    // and add this to the text.
349                                    decodedText.append(decodedWord);
350                                    // we continue parsing from here...we allow parsing errors to fall through
351                                    // and get handled as normal text.
352                                    continue;
353    
354                                } catch (ParseException e) {
355                                }
356                                // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
357                                // if we have it.
358                                if (startWhiteSpace != -1) {
359                                    decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
360                                    startWhiteSpace = -1;
361                                }
362                                // this is not a decoded token.
363                                previousTokenEncoded = false;
364                                decodedText.append(token);
365                            }
366                        }
367                    }
368                }
369            }
370    
371            return decodedText.toString();
372        }
373    
374        /**
375         * Parse a string using the RFC 2047 rules for an "encoded-word"
376         * type.  This encoding has the syntax:
377         *
378         * encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
379         *
380         * @param word   The possibly encoded word value.
381         *
382         * @return The decoded word.
383         * @exception ParseException
384         * @exception UnsupportedEncodingException
385         */
386        public static String decodeWord(String word) throws ParseException, UnsupportedEncodingException {
387            // encoded words start with the characters "=?".  If this not an encoded word, we throw a
388            // ParseException for the caller.
389    
390            if (!word.startsWith("=?")) {
391                throw new ParseException("Invalid RFC 2047 encoded-word: " + word);
392            }
393    
394            int charsetPos = word.indexOf('?', 2);
395            if (charsetPos == -1) {
396                throw new ParseException("Missing charset in RFC 2047 encoded-word: " + word);
397            }
398    
399            // pull out the character set information (this is the MIME name at this point).
400            String charset = word.substring(2, charsetPos).toLowerCase();
401    
402            // now pull out the encoding token the same way.
403            int encodingPos = word.indexOf('?', charsetPos + 1);
404            if (encodingPos == -1) {
405                throw new ParseException("Missing encoding in RFC 2047 encoded-word: " + word);
406            }
407    
408            String encoding = word.substring(charsetPos + 1, encodingPos);
409    
410            // and finally the encoded text.
411            int encodedTextPos = word.indexOf("?=", encodingPos + 1);
412            if (encodedTextPos == -1) {
413                throw new ParseException("Missing encoded text in RFC 2047 encoded-word: " + word);
414            }
415    
416            String encodedText = word.substring(encodingPos + 1, encodedTextPos);
417    
418            // seems a bit silly to encode a null string, but easy to deal with.
419            if (encodedText.length() == 0) {
420                return "";
421            }
422    
423            try {
424                // the decoder writes directly to an output stream.
425                ByteArrayOutputStream out = new ByteArrayOutputStream(encodedText.length());
426    
427                byte[] encodedData = encodedText.getBytes("US-ASCII");
428    
429                // Base64 encoded?
430                if (encoding.equals("B")) {
431                    Base64.decode(encodedData, out);
432                }
433                // maybe quoted printable.
434                else if (encoding.equals("Q")) {
435                    QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
436                    dataEncoder.decodeWord(encodedData, out);
437                }
438                else {
439                    throw new UnsupportedEncodingException("Unknown RFC 2047 encoding: " + encoding);
440                }
441                // get the decoded byte data and convert into a string.
442                byte[] decodedData = out.toByteArray();
443                return new String(decodedData, javaCharset(charset));
444            } catch (IOException e) {
445                throw new UnsupportedEncodingException("Invalid RFC 2047 encoding");
446            }
447    
448        }
449    
450        /**
451         * Wrap an encoder around a given output stream.
452         *
453         * @param out      The output stream to wrap.
454         * @param encoding The name of the encoding.
455         *
456         * @return A instance of FilterOutputStream that manages on the fly
457         *         encoding for the requested encoding type.
458         * @exception MessagingException
459         */
460        public static OutputStream encode(OutputStream out, String encoding) throws MessagingException {
461            // no encoding specified, so assume it goes out unchanged.
462            if (encoding == null) {
463                return out;
464            }
465    
466            encoding = encoding.toLowerCase();
467    
468            // some encodies are just pass-throughs, with no real decoding.
469            if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
470                return out;
471            }
472            else if (encoding.equals("base64")) {
473                return new Base64EncoderStream(out);
474            }
475            // UUEncode is known by a couple historical extension names too.
476            else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
477                return new UUEncoderStream(out);
478            }
479            else if (encoding.equals("quoted-printable")) {
480                return new QuotedPrintableEncoderStream(out);
481            }
482            else {
483                throw new MessagingException("Unknown encoding " + encoding);
484            }
485        }
486    
487        /**
488         * Wrap an encoder around a given output stream.
489         *
490         * @param out      The output stream to wrap.
491         * @param encoding The name of the encoding.
492         * @param filename The filename of the data being sent (only used for UUEncode).
493         *
494         * @return A instance of FilterOutputStream that manages on the fly
495         *         encoding for the requested encoding type.
496         * @exception MessagingException
497         */
498        public static OutputStream encode(OutputStream out, String encoding, String filename) throws MessagingException {
499            encoding = encoding.toLowerCase();
500    
501            // some encodies are just pass-throughs, with no real decoding.
502            if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
503                return out;
504            }
505            else if (encoding.equals("base64")) {
506                return new Base64EncoderStream(out);
507            }
508            // UUEncode is known by a couple historical extension names too.
509            else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
510                return new UUEncoderStream(out, filename);
511            }
512            else if (encoding.equals("quoted-printable")) {
513                 return new QuotedPrintableEncoderStream(out);
514            }
515            else {
516                throw new MessagingException("Unknown encoding " + encoding);
517            }
518        }
519    
520    
521        public static String encodeText(String word) throws UnsupportedEncodingException {
522            return encodeText(word, null, null);
523        }
524    
525        public static String encodeText(String word, String charset, String encoding) throws UnsupportedEncodingException {
526            return encodeWord(word, charset, encoding, false);
527        }
528    
529        public static String encodeWord(String word) throws UnsupportedEncodingException {
530            return encodeWord(word, null, null);
531        }
532    
533        public static String encodeWord(String word, String charset, String encoding) throws UnsupportedEncodingException {
534            return encodeWord(word, charset, encoding, true);
535        }
536    
537    
538        private static String encodeWord(String word, String charset, String encoding, boolean encodingWord) throws UnsupportedEncodingException {
539    
540            // figure out what we need to encode this.
541            String encoder = ASCIIUtil.getTextTransferEncoding(word);
542            // all ascii?  We can return this directly,
543            if (encoder.equals("7bit")) {
544                return word;
545            }
546    
547            // if not given a charset, use the default.
548            if (charset == null) {
549                charset = getDefaultMIMECharset();
550            }
551    
552            // sort out the encoder.  If not explicitly given, use the best guess we've already established.
553            if (encoding != null) {
554                if (encoding.equalsIgnoreCase("B")) {
555                    encoder = "base64";
556                }
557                else if (encoding.equalsIgnoreCase("Q")) {
558                    encoder = "quoted-printable";
559                }
560                else {
561                    throw new UnsupportedEncodingException("Unknown transfer encoding: " + encoding);
562                }
563            }
564    
565            try {
566                // get the string bytes in the correct source charset
567                InputStream in = new ByteArrayInputStream(word.getBytes( javaCharset(charset)));
568                ByteArrayOutputStream out = new ByteArrayOutputStream();
569    
570                if (encoder.equals("base64")) {
571                    Base64Encoder dataEncoder = new Base64Encoder();
572                    dataEncoder.encodeWord(in, charset, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false));
573                }
574                else {
575                    QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
576                    dataEncoder.encodeWord(in, charset, encodingWord ? QP_WORD_SPECIALS : QP_TEXT_SPECIALS, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false));
577                }
578    
579                byte[] bytes = out.toByteArray();
580                return new String(bytes);
581            } catch (IOException e) {
582                throw new UnsupportedEncodingException("Invalid encoding");
583            }
584        }
585    
586    
587        /**
588         * Examine the content of a data source and decide what type
589         * of transfer encoding should be used.  For text streams,
590         * we'll decided between 7bit, quoted-printable, and base64.
591         * For binary content types, we'll use either 7bit or base64.
592         *
593         * @param handler The DataHandler associated with the content.
594         *
595         * @return The string name of an encoding used to transfer the content.
596         */
597        public static String getEncoding(DataHandler handler) {
598    
599    
600            // if this handler has an associated data source, we can read directly from the
601            // data source to make this judgment.  This is generally MUCH faster than asking the
602            // DataHandler to write out the data for us.
603            DataSource ds = handler.getDataSource();
604            if (ds != null) {
605                return getEncoding(ds);
606            }
607    
608            try {
609                // get a parser that allows us to make comparisons.
610                ContentType content = new ContentType(ds.getContentType());
611    
612                // The only access to the content bytes at this point is by asking the handler to write
613                // the information out to a stream.  We're going to pipe this through a special stream
614                // that examines the bytes as they go by.
615                ContentCheckingOutputStream checker = new ContentCheckingOutputStream();
616    
617                handler.writeTo(checker);
618    
619                // figure this out based on whether we believe this to be a text type or not.
620                if (content.match("text/*")) {
621                    return checker.getTextTransferEncoding();
622                }
623                else {
624                    return checker.getBinaryTransferEncoding();
625                }
626    
627            } catch (Exception e) {
628                // any unexpected I/O exceptions we'll force to a "safe" fallback position.
629                return "base64";
630            }
631        }
632    
633    
634        /**
635         * Determine the what transfer encoding should be used for
636         * data retrieved from a DataSource.
637         *
638         * @param source The DataSource for the transmitted data.
639         *
640         * @return The string name of the encoding form that should be used for
641         *         the data.
642         */
643        public static String getEncoding(DataSource source) {
644            InputStream in = null;
645    
646            try {
647                // get a parser that allows us to make comparisons.
648                ContentType content = new ContentType(source.getContentType());
649    
650                // we're probably going to have to scan the data.
651                in = source.getInputStream();
652    
653                if (!content.match("text/*")) {
654                    // Not purporting to be a text type?  Examine the content to see we might be able to
655                    // at least pretend it is an ascii type.
656                    return ASCIIUtil.getBinaryTransferEncoding(in);
657                }
658                else {
659                    return ASCIIUtil.getTextTransferEncoding(in);
660                }
661            } catch (Exception e) {
662                // this was a problem...not sure what makes sense here, so we'll assume it's binary
663                // and we need to transfer this using Base64 encoding.
664                return "base64";
665            } finally {
666                // make sure we close the stream
667                try {
668                    if (in != null) {
669                        in.close();
670                    }
671                } catch (IOException e) {
672                }
673            }
674        }
675    
676    
677        /**
678         * Quote a "word" value.  If the word contains any character from
679         * the specified "specials" list, this value is returned as a
680         * quoted strong.  Otherwise, it is returned unchanged (an "atom").
681         *
682         * @param word     The word requiring quoting.
683         * @param specials The set of special characters that can't appear in an unquoted
684         *                 string.
685         *
686         * @return The quoted value.  This will be unchanged if the word doesn't contain
687         *         any of the designated special characters.
688         */
689        public static String quote(String word, String specials) {
690            int wordLength = word.length();
691            boolean requiresQuoting = false;
692            // scan the string looking for problem characters
693            for (int i =0; i < wordLength; i++) {
694                char ch = word.charAt(i);
695                // special escaped characters require escaping, which also implies quoting.
696                if (escapedChars.indexOf(ch) >= 0) {
697                    return quoteAndEscapeString(word);
698                }
699                // now check for control characters or the designated special characters.
700                if (ch < 32 || ch >= 127 || specials.indexOf(ch) >= 0) {
701                    // we know this requires quoting, but we still need to scan the entire string to
702                    // see if contains chars that require escaping.  Just go ahead and treat it as if it does.
703                    return quoteAndEscapeString(word);
704                }
705            }
706            return word;
707        }
708    
709        /**
710         * Take a string and return it as a formatted quoted string, with
711         * all characters requiring escaping handled properly.
712         *
713         * @param word   The string to quote.
714         *
715         * @return The quoted string.
716         */
717        private static String quoteAndEscapeString(String word) {
718            int wordLength = word.length();
719            // allocate at least enough for the string and two quotes plus a reasonable number of escaped chars.
720            StringBuffer buffer = new StringBuffer(wordLength + 10);
721            // add the leading quote.
722            buffer.append('"');
723    
724            for (int i = 0; i < wordLength; i++) {
725                char ch = word.charAt(i);
726                // is this an escaped char?
727                if (escapedChars.indexOf(ch) >= 0) {
728                    // add the escape marker before appending.
729                    buffer.append('\\');
730                }
731                buffer.append(ch);
732            }
733            // now the closing quote
734            buffer.append('"');
735            return buffer.toString();
736        }
737    
738        /**
739         * Translate a MIME standard character set name into the Java
740         * equivalent.
741         *
742         * @param charset The MIME standard name.
743         *
744         * @return The Java equivalent for this name.
745         */
746        public static String javaCharset(String charset) {
747            // nothing in, nothing out.
748            if (charset == null) {
749                return null;
750            }
751    
752            String mappedCharset = (String)mime2java.get(charset.toLowerCase());
753            // if there is no mapping, then the original name is used.  Many of the MIME character set
754            // names map directly back into Java.  The reverse isn't necessarily true.
755            return mappedCharset == null ? charset : mappedCharset;
756        }
757    
758        /**
759         * Map a Java character set name into the MIME equivalent.
760         *
761         * @param charset The java character set name.
762         *
763         * @return The MIME standard equivalent for this character set name.
764         */
765        public static String mimeCharset(String charset) {
766            // nothing in, nothing out.
767            if (charset == null) {
768                return null;
769            }
770    
771            String mappedCharset = (String)java2mime.get(charset.toLowerCase());
772            // if there is no mapping, then the original name is used.  Many of the MIME character set
773            // names map directly back into Java.  The reverse isn't necessarily true.
774            return mappedCharset == null ? charset : mappedCharset;
775        }
776    
777    
778        /**
779         * Get the default character set to use, in Java name format.
780         * This either be the value set with the mail.mime.charset
781         * system property or obtained from the file.encoding system
782         * property.  If neither of these is set, we fall back to
783         * 8859_1 (basically US-ASCII).
784         *
785         * @return The character string value of the default character set.
786         */
787        public static String getDefaultJavaCharset() {
788            String charset = SessionUtil.getProperty("mail.mime.charset");
789            if (charset != null) {
790                return javaCharset(charset);
791            }
792            return SessionUtil.getProperty("file.encoding", "8859_1");
793        }
794    
795        /**
796         * Get the default character set to use, in MIME name format.
797         * This either be the value set with the mail.mime.charset
798         * system property or obtained from the file.encoding system
799         * property.  If neither of these is set, we fall back to
800         * 8859_1 (basically US-ASCII).
801         *
802         * @return The character string value of the default character set.
803         */
804        static String getDefaultMIMECharset() {
805            // if the property is specified, this can be used directly.
806            String charset = SessionUtil.getProperty("mail.mime.charset");
807            if (charset != null) {
808                return charset;
809            }
810    
811            // get the Java-defined default and map back to a MIME name.
812            return mimeCharset(SessionUtil.getProperty("file.encoding", "8859_1"));
813        }
814    
815    
816        /**
817         * Load the default mapping tables used by the javaCharset()
818         * and mimeCharset() methods.  By default, these tables are
819         * loaded from the /META-INF/javamail.charset.map file.  If
820         * something goes wrong loading that file, we configure things
821         * with a default mapping table (which just happens to mimic
822         * what's in the default mapping file).
823         */
824        static private void loadCharacterSetMappings() {
825            java2mime = new HashMap();
826            mime2java = new HashMap();
827    
828    
829            // normally, these come from a character map file contained in the jar file.
830            try {
831                InputStream map = javax.mail.internet.MimeUtility.class.getResourceAsStream("/META-INF/javamail.charset.map");
832    
833                if (map != null) {
834                    // get a reader for this so we can load.
835                    BufferedReader reader = new BufferedReader(new InputStreamReader(map));
836    
837                    readMappings(reader, java2mime);
838                    readMappings(reader, mime2java);
839                }
840            } catch (Exception e) {
841            }
842    
843            // if any sort of error occurred reading the preferred file version, we could end up with empty
844            // mapping tables.  This could cause all sorts of difficulty, so ensure they are populated with at
845            // least a reasonable set of defaults.
846    
847            // these mappings echo what's in the default file.
848            if (java2mime.isEmpty()) {
849                java2mime.put("8859_1", "ISO-8859-1");
850                java2mime.put("iso8859_1", "ISO-8859-1");
851                java2mime.put("iso8859-1", "ISO-8859-1");
852    
853                java2mime.put("8859_2", "ISO-8859-2");
854                java2mime.put("iso8859_2", "ISO-8859-2");
855                java2mime.put("iso8859-2", "ISO-8859-2");
856    
857                java2mime.put("8859_3", "ISO-8859-3");
858                java2mime.put("iso8859_3", "ISO-8859-3");
859                java2mime.put("iso8859-3", "ISO-8859-3");
860    
861                java2mime.put("8859_4", "ISO-8859-4");
862                java2mime.put("iso8859_4", "ISO-8859-4");
863                java2mime.put("iso8859-4", "ISO-8859-4");
864    
865                java2mime.put("8859_5", "ISO-8859-5");
866                java2mime.put("iso8859_5", "ISO-8859-5");
867                java2mime.put("iso8859-5", "ISO-8859-5");
868    
869                java2mime.put ("8859_6", "ISO-8859-6");
870                java2mime.put("iso8859_6", "ISO-8859-6");
871                java2mime.put("iso8859-6", "ISO-8859-6");
872    
873                java2mime.put("8859_7", "ISO-8859-7");
874                java2mime.put("iso8859_7", "ISO-8859-7");
875                java2mime.put("iso8859-7", "ISO-8859-7");
876    
877                java2mime.put("8859_8", "ISO-8859-8");
878                java2mime.put("iso8859_8", "ISO-8859-8");
879                java2mime.put("iso8859-8", "ISO-8859-8");
880    
881                java2mime.put("8859_9", "ISO-8859-9");
882                java2mime.put("iso8859_9", "ISO-8859-9");
883                java2mime.put("iso8859-9", "ISO-8859-9");
884    
885                java2mime.put("sjis", "Shift_JIS");
886                java2mime.put ("jis", "ISO-2022-JP");
887                java2mime.put("iso2022jp", "ISO-2022-JP");
888                java2mime.put("euc_jp", "euc-jp");
889                java2mime.put("koi8_r", "koi8-r");
890                java2mime.put("euc_cn", "euc-cn");
891                java2mime.put("euc_tw", "euc-tw");
892                java2mime.put("euc_kr", "euc-kr");
893            }
894    
895            if (mime2java.isEmpty ()) {
896                mime2java.put("iso-2022-cn", "ISO2022CN");
897                mime2java.put("iso-2022-kr", "ISO2022KR");
898                mime2java.put("utf-8", "UTF8");
899                mime2java.put("utf8", "UTF8");
900                mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
901                mime2java.put("ja_jp.eucjp", "EUCJIS");
902                mime2java.put ("euc-kr", "KSC5601");
903                mime2java.put("euckr", "KSC5601");
904                mime2java.put("us-ascii", "ISO-8859-1");
905                mime2java.put("x-us-ascii", "ISO-8859-1");
906            }
907        }
908    
909    
910        /**
911         * Read a section of a character map table and populate the
912         * target mapping table with the information.  The table end
913         * is marked by a line starting with "--" and also ending with
914         * "--".  Blank lines and comment lines (beginning with '#') are
915         * ignored.
916         *
917         * @param reader The source of the file information.
918         * @param table  The mapping table used to store the information.
919         */
920        static private void readMappings(BufferedReader reader, Map table) throws IOException {
921            // process lines to the EOF or the end of table marker.
922            while (true) {
923                String line = reader.readLine();
924                // no line returned is an EOF
925                if (line == null) {
926                    return;
927                }
928    
929                // trim so we're not messed up by trailing blanks
930                line = line.trim();
931    
932                if (line.length() == 0 || line.startsWith("#")) {
933                    continue;
934                }
935    
936                // stop processing if this is the end-of-table marker.
937                if (line.startsWith("--") && line.endsWith("--")) {
938                    return;
939                }
940    
941                // we allow either blanks or tabs as token delimiters.
942                StringTokenizer tokenizer = new StringTokenizer(line, " \t");
943    
944                try {
945                    String from = tokenizer.nextToken().toLowerCase();
946                    String to = tokenizer.nextToken();
947    
948                    table.put(from, to);
949                } catch (NoSuchElementException e) {
950                    // just ignore the line if invalid.
951                }
952            }
953        }
954    
955    
956    }
957    
958    
959    /**
960     * Utility class for examining content information written out
961     * by a DataHandler object.  This stream gathers statistics on
962     * the stream so it can make transfer encoding determinations.
963     */
964    class ContentCheckingOutputStream extends OutputStream {
965        private int asciiChars = 0;
966        private int nonAsciiChars = 0;
967        private boolean containsLongLines = false;
968        private boolean containsMalformedEOL = false;
969        private int previousChar = 0;
970        private int span = 0;
971    
972        ContentCheckingOutputStream() {
973        }
974    
975        public void write(byte[] data) throws IOException {
976            write(data, 0, data.length);
977        }
978    
979        public void write(byte[] data, int offset, int length) throws IOException {
980            for (int i = 0; i < length; i++) {
981                write(data[offset + i]);
982            }
983        }
984    
985        public void write(int ch) {
986            // we found a linebreak.  Reset the line length counters on either one.  We don't
987            // really need to validate here.
988            if (ch == '\n' || ch == '\r') {
989                // we found a newline, this is only valid if the previous char was the '\r'
990                if (ch == '\n') {
991                    // malformed linebreak?  force this to base64 encoding.
992                    if (previousChar != '\r') {
993                        containsMalformedEOL = true;
994                    }
995                }
996                // hit a line end, reset our line length counter
997                span = 0;
998            }
999            else {
1000                span++;
1001                // the text has long lines, we can't transfer this as unencoded text.
1002                if (span > 998) {
1003                    containsLongLines = true;
1004                }
1005    
1006                // non-ascii character, we have to transfer this in binary.
1007                if (!ASCIIUtil.isAscii(ch)) {
1008                    nonAsciiChars++;
1009                }
1010                else {
1011                    asciiChars++;
1012                }
1013            }
1014            previousChar = ch;
1015        }
1016    
1017    
1018        public String getBinaryTransferEncoding() {
1019            if (nonAsciiChars != 0 || containsLongLines || containsMalformedEOL) {
1020                return "base64";
1021            }
1022            else {
1023                return "7bit";
1024            }
1025        }
1026    
1027        public String getTextTransferEncoding() {
1028            // looking good so far, only valid chars here.
1029            if (nonAsciiChars == 0) {
1030                // does this contain long text lines?  We need to use a Q-P encoding which will
1031                // be only slightly longer, but handles folding the longer lines.
1032                if (containsLongLines) {
1033                    return "quoted-printable";
1034                }
1035                else {
1036                    // ideal!  Easiest one to handle.
1037                    return "7bit";
1038                }
1039            }
1040            else {
1041                // mostly characters requiring encoding?  Base64 is our best bet.
1042                if (nonAsciiChars > asciiChars) {
1043                    return "base64";
1044                }
1045                else {
1046                    // Q-P encoding will use fewer bytes than the full Base64.
1047                    return "quoted-printable";
1048                }
1049            }
1050        }
1051    }