001    /**
002     *
003     * Copyright 2003-2006 The Apache Software Foundation
004     *
005     *  Licensed under the Apache License, Version 2.0 (the "License");
006     *  you may not use this file except in compliance with the License.
007     *  You may obtain a copy of the License at
008     *
009     *     http://www.apache.org/licenses/LICENSE-2.0
010     *
011     *  Unless required by applicable law or agreed to in writing, software
012     *  distributed under the License is distributed on an "AS IS" BASIS,
013     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     *  See the License for the specific language governing permissions and
015     *  limitations under the License.
016     */
017    
018    package javax.mail.internet;
019    
020    import java.io.BufferedInputStream;
021    import java.io.BufferedReader;
022    import java.io.ByteArrayInputStream;
023    import java.io.ByteArrayOutputStream;
024    import java.io.IOException;
025    import java.io.InputStream;
026    import java.io.InputStreamReader;
027    import java.io.OutputStream;
028    import java.io.UnsupportedEncodingException;
029    import java.util.HashMap;
030    import java.util.Map;
031    import java.util.NoSuchElementException;
032    import java.util.StringTokenizer;
033    
034    import javax.activation.DataHandler;
035    import javax.activation.DataSource;
036    import javax.mail.MessagingException;
037    
038    import org.apache.geronimo.mail.util.ASCIIUtil;
039    import org.apache.geronimo.mail.util.Base64;
040    import org.apache.geronimo.mail.util.Base64DecoderStream;
041    import org.apache.geronimo.mail.util.Base64Encoder;
042    import org.apache.geronimo.mail.util.Base64EncoderStream;
043    import org.apache.geronimo.mail.util.QuotedPrintableDecoderStream;
044    import org.apache.geronimo.mail.util.QuotedPrintableEncoderStream;
045    import org.apache.geronimo.mail.util.QuotedPrintableEncoder;
046    import org.apache.geronimo.mail.util.QuotedPrintable;
047    import org.apache.geronimo.mail.util.SessionUtil;
048    import org.apache.geronimo.mail.util.UUDecoderStream;
049    import org.apache.geronimo.mail.util.UUEncoderStream;
050    
051    // encodings include "base64", "quoted-printable", "7bit", "8bit" and "binary".
052    // In addition, "uuencode" is also supported. The
053    
054    /**
055     * @version $Rev: 421852 $ $Date: 2006-07-14 03:02:19 -0700 (Fri, 14 Jul 2006) $
056     */
057    public class MimeUtility {
058    
059        private static final String MIME_FOLDENCODEDWORDS = "mail.mime.foldencodedwords";
060        private static final String MIME_DECODE_TEXT_STRICT = "mail.mime.decodetext.strict";
061        private static final String MIME_FOLDTEXT = "mail.mime.foldtext";
062        private static final int FOLD_THRESHOLD = 76;
063    
064        private MimeUtility() {
065        }
066    
067        public static final int ALL = -1;
068    
069        private static String defaultJavaCharset;
070        private static String escapedChars = "\"\\\r\n";
071        private static String linearWhiteSpace = " \t\r\n";
072    
073        private static String QP_WORD_SPECIALS = "=_?\"#$%&'(),.:;<>@[\\]^`{|}~";
074        private static String QP_TEXT_SPECIALS = "=_?";
075    
076        // the javamail spec includes the ability to map java encoding names to MIME-specified names.  Normally,
077        // these values are loaded from a character mapping file.
078        private static Map java2mime;
079        private static Map mime2java;
080    
081        static {
082            // we need to load the mapping tables used by javaCharset() and mimeCharset().
083            loadCharacterSetMappings();
084        }
085    
086        public static InputStream decode(InputStream in, String encoding) throws MessagingException {
087            encoding = encoding.toLowerCase();
088    
089            // some encodies are just pass-throughs, with no real decoding.
090            if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
091                return in;
092            }
093            else if (encoding.equals("base64")) {
094                return new Base64DecoderStream(in);
095            }
096            // UUEncode is known by a couple historical extension names too.
097            else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
098                return new UUDecoderStream(in);
099            }
100            else if (encoding.equals("quoted-printable")) {
101                return new QuotedPrintableDecoderStream(in);
102            }
103            else {
104                throw new MessagingException("Unknown encoding " + encoding);
105            }
106        }
107    
108        /**
109         * Decode a string of text obtained from a mail header into
110         * it's proper form.  The text generally will consist of a
111         * string of tokens, some of which may be encoded using
112         * base64 encoding.
113         *
114         * @param text   The text to decode.
115         *
116         * @return The decoded test string.
117         * @exception UnsupportedEncodingException
118         */
119        public static String decodeText(String text) throws UnsupportedEncodingException {
120            // if the text contains any encoded tokens, those tokens will be marked with "=?".  If the
121            // source string doesn't contain that sequent, no decoding is required.
122            if (text.indexOf("=?") < 0) {
123                return text;
124            }
125    
126            // we have two sets of rules we can apply.
127            if (!SessionUtil.getBooleanProperty(MIME_DECODE_TEXT_STRICT, true)) {
128                return decodeTextNonStrict(text);
129            }
130    
131            int offset = 0;
132            int endOffset = text.length();
133    
134            int startWhiteSpace = -1;
135            int endWhiteSpace = -1;
136    
137            StringBuffer decodedText = new StringBuffer(text.length());
138    
139            boolean previousTokenEncoded = false;
140    
141            while (offset < endOffset) {
142                char ch = text.charAt(offset);
143    
144                // is this a whitespace character?
145                if (linearWhiteSpace.indexOf(ch) != -1) {
146                    startWhiteSpace = offset;
147                    while (offset < endOffset) {
148                        // step over the white space characters.
149                        ch = text.charAt(offset);
150                        if (linearWhiteSpace.indexOf(ch) != -1) {
151                            offset++;
152                        }
153                        else {
154                            // record the location of the first non lwsp and drop down to process the
155                            // token characters.
156                            endWhiteSpace = offset;
157                            break;
158                        }
159                    }
160                }
161                else {
162                    // we have a word token.  We need to scan over the word and then try to parse it.
163                    int wordStart = offset;
164    
165                    while (offset < endOffset) {
166                        // step over the white space characters.
167                        ch = text.charAt(offset);
168                        if (linearWhiteSpace.indexOf(ch) == -1) {
169                            offset++;
170                        }
171                        else {
172                            break;
173                        }
174    
175                        //NB:  Trailing whitespace on these header strings will just be discarded.
176                    }
177                    // pull out the word token.
178                    String word = text.substring(wordStart, offset);
179                    // is the token encoded?  decode the word
180                    if (word.startsWith("=?")) {
181                        try {
182                            // if this gives a parsing failure, treat it like a non-encoded word.
183                            String decodedWord = decodeWord(word);
184    
185                            // are any whitespace characters significant?  Append 'em if we've got 'em.
186                            if (!previousTokenEncoded) {
187                                if (startWhiteSpace != -1) {
188                                    decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
189                                    startWhiteSpace = -1;
190                                }
191                            }
192                            // this is definitely a decoded token.
193                            previousTokenEncoded = true;
194                            // and add this to the text.
195                            decodedText.append(decodedWord);
196                            // we continue parsing from here...we allow parsing errors to fall through
197                            // and get handled as normal text.
198                            continue;
199    
200                        } catch (ParseException e) {
201                        }
202                    }
203                    // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
204                    // if we have it.
205                    if (startWhiteSpace != -1) {
206                        decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
207                        startWhiteSpace = -1;
208                    }
209                    // this is not a decoded token.
210                    previousTokenEncoded = false;
211                    decodedText.append(word);
212                }
213            }
214    
215            return decodedText.toString();
216        }
217    
218    
219        /**
220         * Decode a string of text obtained from a mail header into
221         * it's proper form.  The text generally will consist of a
222         * string of tokens, some of which may be encoded using
223         * base64 encoding.  This is for non-strict decoded for mailers that
224         * violate the RFC 2047 restriction that decoded tokens must be delimited
225         * by linear white space.  This will scan tokens looking for inner tokens
226         * enclosed in "=?" -- "?=" pairs.
227         *
228         * @param text   The text to decode.
229         *
230         * @return The decoded test string.
231         * @exception UnsupportedEncodingException
232         */
233        private static String decodeTextNonStrict(String text) throws UnsupportedEncodingException {
234            int offset = 0;
235            int endOffset = text.length();
236    
237            int startWhiteSpace = -1;
238            int endWhiteSpace = -1;
239    
240            StringBuffer decodedText = new StringBuffer(text.length());
241    
242            boolean previousTokenEncoded = false;
243    
244            while (offset < endOffset) {
245                char ch = text.charAt(offset);
246    
247                // is this a whitespace character?
248                if (linearWhiteSpace.indexOf(ch) != -1) {
249                    startWhiteSpace = offset;
250                    while (offset < endOffset) {
251                        // step over the white space characters.
252                        ch = text.charAt(offset);
253                        if (linearWhiteSpace.indexOf(ch) != -1) {
254                            offset++;
255                        }
256                        else {
257                            // record the location of the first non lwsp and drop down to process the
258                            // token characters.
259                            endWhiteSpace = offset;
260                            break;
261                        }
262                    }
263                }
264                else {
265                    // we're at the start of a word token.  We potentially need to break this up into subtokens
266                    int wordStart = offset;
267    
268                    while (offset < endOffset) {
269                        // step over the white space characters.
270                        ch = text.charAt(offset);
271                        if (linearWhiteSpace.indexOf(ch) == -1) {
272                            offset++;
273                        }
274                        else {
275                            break;
276                        }
277    
278                        //NB:  Trailing whitespace on these header strings will just be discarded.
279                    }
280                    // pull out the word token.
281                    String word = text.substring(wordStart, offset);
282    
283                    int decodeStart = 0;
284    
285                    // now scan and process each of the bits within here.
286                    while (decodeStart < word.length()) {
287                        int tokenStart = word.indexOf("=?", decodeStart);
288                        if (tokenStart == -1) {
289                            // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
290                            // if we have it.
291                            if (startWhiteSpace != -1) {
292                                decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
293                                startWhiteSpace = -1;
294                            }
295                            // this is not a decoded token.
296                            previousTokenEncoded = false;
297                            decodedText.append(word.substring(decodeStart));
298                            // we're finished.
299                            break;
300                        }
301                        // we have something to process
302                        else {
303                            // we might have a normal token preceeding this.
304                            if (tokenStart != decodeStart) {
305                                // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
306                                // if we have it.
307                                if (startWhiteSpace != -1) {
308                                    decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
309                                    startWhiteSpace = -1;
310                                }
311                                // this is not a decoded token.
312                                previousTokenEncoded = false;
313                                decodedText.append(word.substring(decodeStart, tokenStart));
314                            }
315    
316                            // now find the end marker.
317                            int tokenEnd = word.indexOf("?=", tokenStart);
318                            // sigh, an invalid token.  Treat this as plain text.
319                            if (tokenEnd == -1) {
320                                // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
321                                // if we have it.
322                                if (startWhiteSpace != -1) {
323                                    decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
324                                    startWhiteSpace = -1;
325                                }
326                                // this is not a decoded token.
327                                previousTokenEncoded = false;
328                                decodedText.append(word.substring(tokenStart));
329                                // we're finished.
330                                break;
331                            }
332                            else {
333                                // update our ticker
334                                decodeStart = tokenEnd + 2;
335    
336                                String token = word.substring(tokenStart, tokenEnd);
337                                try {
338                                    // if this gives a parsing failure, treat it like a non-encoded word.
339                                    String decodedWord = decodeWord(token);
340    
341                                    // are any whitespace characters significant?  Append 'em if we've got 'em.
342                                    if (!previousTokenEncoded) {
343                                        if (startWhiteSpace != -1) {
344                                            decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
345                                            startWhiteSpace = -1;
346                                        }
347                                    }
348                                    // this is definitely a decoded token.
349                                    previousTokenEncoded = true;
350                                    // and add this to the text.
351                                    decodedText.append(decodedWord);
352                                    // we continue parsing from here...we allow parsing errors to fall through
353                                    // and get handled as normal text.
354                                    continue;
355    
356                                } catch (ParseException e) {
357                                }
358                                // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
359                                // if we have it.
360                                if (startWhiteSpace != -1) {
361                                    decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
362                                    startWhiteSpace = -1;
363                                }
364                                // this is not a decoded token.
365                                previousTokenEncoded = false;
366                                decodedText.append(token);
367                            }
368                        }
369                    }
370                }
371            }
372    
373            return decodedText.toString();
374        }
375    
376        /**
377         * Parse a string using the RFC 2047 rules for an "encoded-word"
378         * type.  This encoding has the syntax:
379         *
380         * encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
381         *
382         * @param word   The possibly encoded word value.
383         *
384         * @return The decoded word.
385         * @exception ParseException
386         * @exception UnsupportedEncodingException
387         */
388        public static String decodeWord(String word) throws ParseException, UnsupportedEncodingException {
389            // encoded words start with the characters "=?".  If this not an encoded word, we throw a
390            // ParseException for the caller.
391    
392            if (!word.startsWith("=?")) {
393                throw new ParseException("Invalid RFC 2047 encoded-word: " + word);
394            }
395    
396            int charsetPos = word.indexOf('?', 2);
397            if (charsetPos == -1) {
398                throw new ParseException("Missing charset in RFC 2047 encoded-word: " + word);
399            }
400    
401            // pull out the character set information (this is the MIME name at this point).
402            String charset = word.substring(2, charsetPos).toLowerCase();
403    
404            // now pull out the encoding token the same way.
405            int encodingPos = word.indexOf('?', charsetPos + 1);
406            if (encodingPos == -1) {
407                throw new ParseException("Missing encoding in RFC 2047 encoded-word: " + word);
408            }
409    
410            String encoding = word.substring(charsetPos + 1, encodingPos);
411    
412            // and finally the encoded text.
413            int encodedTextPos = word.indexOf("?=", encodingPos + 1);
414            if (encodedTextPos == -1) {
415                throw new ParseException("Missing encoded text in RFC 2047 encoded-word: " + word);
416            }
417    
418            String encodedText = word.substring(encodingPos + 1, encodedTextPos);
419    
420            // seems a bit silly to encode a null string, but easy to deal with.
421            if (encodedText.length() == 0) {
422                return "";
423            }
424    
425            try {
426                // the decoder writes directly to an output stream.
427                ByteArrayOutputStream out = new ByteArrayOutputStream(encodedText.length());
428    
429                byte[] encodedData = encodedText.getBytes("US-ASCII");
430    
431                // Base64 encoded?
432                if (encoding.equals("B")) {
433                    Base64.decode(encodedData, out);
434                }
435                // maybe quoted printable.
436                else if (encoding.equals("Q")) {
437                    QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
438                    dataEncoder.decodeWord(encodedData, out);
439                }
440                else {
441                    throw new UnsupportedEncodingException("Unknown RFC 2047 encoding: " + encoding);
442                }
443                // get the decoded byte data and convert into a string.
444                byte[] decodedData = out.toByteArray();
445                return new String(decodedData, javaCharset(charset));
446            } catch (IOException e) {
447                throw new UnsupportedEncodingException("Invalid RFC 2047 encoding");
448            }
449    
450        }
451    
452        /**
453         * Wrap an encoder around a given output stream.
454         *
455         * @param out      The output stream to wrap.
456         * @param encoding The name of the encoding.
457         *
458         * @return A instance of FilterOutputStream that manages on the fly
459         *         encoding for the requested encoding type.
460         * @exception MessagingException
461         */
462        public static OutputStream encode(OutputStream out, String encoding) throws MessagingException {
463            // no encoding specified, so assume it goes out unchanged.
464            if (encoding == null) {
465                return out;
466            }
467    
468            encoding = encoding.toLowerCase();
469    
470            // some encodies are just pass-throughs, with no real decoding.
471            if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
472                return out;
473            }
474            else if (encoding.equals("base64")) {
475                return new Base64EncoderStream(out);
476            }
477            // UUEncode is known by a couple historical extension names too.
478            else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
479                return new UUEncoderStream(out);
480            }
481            else if (encoding.equals("quoted-printable")) {
482                return new QuotedPrintableEncoderStream(out);
483            }
484            else {
485                throw new MessagingException("Unknown encoding " + encoding);
486            }
487        }
488    
489        /**
490         * Wrap an encoder around a given output stream.
491         *
492         * @param out      The output stream to wrap.
493         * @param encoding The name of the encoding.
494         * @param filename The filename of the data being sent (only used for UUEncode).
495         *
496         * @return A instance of FilterOutputStream that manages on the fly
497         *         encoding for the requested encoding type.
498         * @exception MessagingException
499         */
500        public static OutputStream encode(OutputStream out, String encoding, String filename) throws MessagingException {
501            encoding = encoding.toLowerCase();
502    
503            // some encodies are just pass-throughs, with no real decoding.
504            if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
505                return out;
506            }
507            else if (encoding.equals("base64")) {
508                return new Base64EncoderStream(out);
509            }
510            // UUEncode is known by a couple historical extension names too.
511            else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
512                return new UUEncoderStream(out, filename);
513            }
514            else if (encoding.equals("quoted-printable")) {
515                 return new QuotedPrintableEncoderStream(out);
516            }
517            else {
518                throw new MessagingException("Unknown encoding " + encoding);
519            }
520        }
521    
522    
523        public static String encodeText(String word) throws UnsupportedEncodingException {
524            return encodeText(word, null, null);
525        }
526    
527        public static String encodeText(String word, String charset, String encoding) throws UnsupportedEncodingException {
528            return encodeWord(word, charset, encoding, false);
529        }
530    
531        public static String encodeWord(String word) throws UnsupportedEncodingException {
532            return encodeWord(word, null, null);
533        }
534    
535        public static String encodeWord(String word, String charset, String encoding) throws UnsupportedEncodingException {
536            return encodeWord(word, charset, encoding, true);
537        }
538    
539    
540        private static String encodeWord(String word, String charset, String encoding, boolean encodingWord) throws UnsupportedEncodingException {
541    
542            // figure out what we need to encode this.
543            String encoder = ASCIIUtil.getTextTransferEncoding(word);
544            // all ascii?  We can return this directly,
545            if (encoder.equals("7bit")) {
546                return word;
547            }
548    
549            // if not given a charset, use the default.
550            if (charset == null) {
551                charset = getDefaultMIMECharset();
552            }
553    
554            // sort out the encoder.  If not explicitly given, use the best guess we've already established.
555            if (encoding != null) {
556                if (encoding.equalsIgnoreCase("B")) {
557                    encoder = "base64";
558                }
559                else if (encoding.equalsIgnoreCase("Q")) {
560                    encoder = "quoted-printable";
561                }
562                else {
563                    throw new UnsupportedEncodingException("Unknown transfer encoding: " + encoding);
564                }
565            }
566    
567            try {
568                // get the string bytes in the correct source charset
569                InputStream in = new ByteArrayInputStream(word.getBytes( javaCharset(charset)));
570                ByteArrayOutputStream out = new ByteArrayOutputStream();
571    
572                if (encoder.equals("base64")) {
573                    Base64Encoder dataEncoder = new Base64Encoder();
574                    dataEncoder.encodeWord(in, charset, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false));
575                }
576                else {
577                    QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
578                    dataEncoder.encodeWord(in, charset, encodingWord ? QP_WORD_SPECIALS : QP_TEXT_SPECIALS, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false));
579                }
580    
581                byte[] bytes = out.toByteArray();
582                return new String(bytes);
583            } catch (IOException e) {
584                throw new UnsupportedEncodingException("Invalid encoding");
585            }
586        }
587    
588    
589        /**
590         * Examine the content of a data source and decide what type
591         * of transfer encoding should be used.  For text streams,
592         * we'll decided between 7bit, quoted-printable, and base64.
593         * For binary content types, we'll use either 7bit or base64.
594         *
595         * @param handler The DataHandler associated with the content.
596         *
597         * @return The string name of an encoding used to transfer the content.
598         */
599        public static String getEncoding(DataHandler handler) {
600    
601    
602            // if this handler has an associated data source, we can read directly from the
603            // data source to make this judgment.  This is generally MUCH faster than asking the
604            // DataHandler to write out the data for us.
605            DataSource ds = handler.getDataSource();
606            if (ds != null) {
607                return getEncoding(ds);
608            }
609    
610            try {
611                // get a parser that allows us to make comparisons.
612                ContentType content = new ContentType(ds.getContentType());
613    
614                // The only access to the content bytes at this point is by asking the handler to write
615                // the information out to a stream.  We're going to pipe this through a special stream
616                // that examines the bytes as they go by.
617                ContentCheckingOutputStream checker = new ContentCheckingOutputStream();
618    
619                handler.writeTo(checker);
620    
621                // figure this out based on whether we believe this to be a text type or not.
622                if (content.match("text/*")) {
623                    return checker.getTextTransferEncoding();
624                }
625                else {
626                    return checker.getBinaryTransferEncoding();
627                }
628    
629            } catch (Exception e) {
630                // any unexpected I/O exceptions we'll force to a "safe" fallback position.
631                return "base64";
632            }
633        }
634    
635    
636        /**
637         * Determine the what transfer encoding should be used for
638         * data retrieved from a DataSource.
639         *
640         * @param source The DataSource for the transmitted data.
641         *
642         * @return The string name of the encoding form that should be used for
643         *         the data.
644         */
645        public static String getEncoding(DataSource source) {
646            InputStream in = null;
647    
648            try {
649                // get a parser that allows us to make comparisons.
650                ContentType content = new ContentType(source.getContentType());
651    
652                // we're probably going to have to scan the data.
653                in = source.getInputStream();
654    
655                if (!content.match("text/*")) {
656                    // Not purporting to be a text type?  Examine the content to see we might be able to
657                    // at least pretend it is an ascii type.
658                    return ASCIIUtil.getBinaryTransferEncoding(in);
659                }
660                else {
661                    return ASCIIUtil.getTextTransferEncoding(in);
662                }
663            } catch (Exception e) {
664                // this was a problem...not sure what makes sense here, so we'll assume it's binary
665                // and we need to transfer this using Base64 encoding.
666                return "base64";
667            } finally {
668                // make sure we close the stream
669                try {
670                    if (in != null) {
671                        in.close();
672                    }
673                } catch (IOException e) {
674                }
675            }
676        }
677    
678    
679        /**
680         * Quote a "word" value.  If the word contains any character from
681         * the specified "specials" list, this value is returned as a
682         * quoted strong.  Otherwise, it is returned unchanged (an "atom").
683         *
684         * @param word     The word requiring quoting.
685         * @param specials The set of special characters that can't appear in an unquoted
686         *                 string.
687         *
688         * @return The quoted value.  This will be unchanged if the word doesn't contain
689         *         any of the designated special characters.
690         */
691        public static String quote(String word, String specials) {
692            int wordLength = word.length();
693            boolean requiresQuoting = false;
694            // scan the string looking for problem characters
695            for (int i =0; i < wordLength; i++) {
696                char ch = word.charAt(i);
697                // special escaped characters require escaping, which also implies quoting.
698                if (escapedChars.indexOf(ch) >= 0) {
699                    return quoteAndEscapeString(word);
700                }
701                // now check for control characters or the designated special characters.
702                if (ch < 32 || ch >= 127 || specials.indexOf(ch) >= 0) {
703                    // we know this requires quoting, but we still need to scan the entire string to
704                    // see if contains chars that require escaping.  Just go ahead and treat it as if it does.
705                    return quoteAndEscapeString(word);
706                }
707            }
708            return word;
709        }
710    
711        /**
712         * Take a string and return it as a formatted quoted string, with
713         * all characters requiring escaping handled properly.
714         *
715         * @param word   The string to quote.
716         *
717         * @return The quoted string.
718         */
719        private static String quoteAndEscapeString(String word) {
720            int wordLength = word.length();
721            // allocate at least enough for the string and two quotes plus a reasonable number of escaped chars.
722            StringBuffer buffer = new StringBuffer(wordLength + 10);
723            // add the leading quote.
724            buffer.append('"');
725    
726            for (int i = 0; i < wordLength; i++) {
727                char ch = word.charAt(i);
728                // is this an escaped char?
729                if (escapedChars.indexOf(ch) >= 0) {
730                    // add the escape marker before appending.
731                    buffer.append('\\');
732                }
733                buffer.append(ch);
734            }
735            // now the closing quote
736            buffer.append('"');
737            return buffer.toString();
738        }
739    
740        /**
741         * Translate a MIME standard character set name into the Java
742         * equivalent.
743         *
744         * @param charset The MIME standard name.
745         *
746         * @return The Java equivalent for this name.
747         */
748        public static String javaCharset(String charset) {
749            // nothing in, nothing out.
750            if (charset == null) {
751                return null;
752            }
753    
754            String mappedCharset = (String)mime2java.get(charset.toLowerCase());
755            // if there is no mapping, then the original name is used.  Many of the MIME character set
756            // names map directly back into Java.  The reverse isn't necessarily true.
757            return mappedCharset == null ? charset : mappedCharset;
758        }
759    
760        /**
761         * Map a Java character set name into the MIME equivalent.
762         *
763         * @param charset The java character set name.
764         *
765         * @return The MIME standard equivalent for this character set name.
766         */
767        public static String mimeCharset(String charset) {
768            // nothing in, nothing out.
769            if (charset == null) {
770                return null;
771            }
772    
773            String mappedCharset = (String)java2mime.get(charset.toLowerCase());
774            // if there is no mapping, then the original name is used.  Many of the MIME character set
775            // names map directly back into Java.  The reverse isn't necessarily true.
776            return mappedCharset == null ? charset : mappedCharset;
777        }
778    
779    
780        /**
781         * Get the default character set to use, in Java name format.
782         * This either be the value set with the mail.mime.charset
783         * system property or obtained from the file.encoding system
784         * property.  If neither of these is set, we fall back to
785         * 8859_1 (basically US-ASCII).
786         *
787         * @return The character string value of the default character set.
788         */
789        public static String getDefaultJavaCharset() {
790            String charset = SessionUtil.getProperty("mail.mime.charset");
791            if (charset != null) {
792                return javaCharset(charset);
793            }
794            return SessionUtil.getProperty("file.encoding", "8859_1");
795        }
796    
797        /**
798         * Get the default character set to use, in MIME name format.
799         * This either be the value set with the mail.mime.charset
800         * system property or obtained from the file.encoding system
801         * property.  If neither of these is set, we fall back to
802         * 8859_1 (basically US-ASCII).
803         *
804         * @return The character string value of the default character set.
805         */
806        static String getDefaultMIMECharset() {
807            // if the property is specified, this can be used directly.
808            String charset = SessionUtil.getProperty("mail.mime.charset");
809            if (charset != null) {
810                return charset;
811            }
812    
813            // get the Java-defined default and map back to a MIME name.
814            return mimeCharset(SessionUtil.getProperty("file.encoding", "8859_1"));
815        }
816    
817    
818        /**
819         * Load the default mapping tables used by the javaCharset()
820         * and mimeCharset() methods.  By default, these tables are
821         * loaded from the /META-INF/javamail.charset.map file.  If
822         * something goes wrong loading that file, we configure things
823         * with a default mapping table (which just happens to mimic
824         * what's in the default mapping file).
825         */
826        static private void loadCharacterSetMappings() {
827            java2mime = new HashMap();
828            mime2java = new HashMap();
829    
830    
831            // normally, these come from a character map file contained in the jar file.
832            try {
833                InputStream map = javax.mail.internet.MimeUtility.class.getResourceAsStream("/META-INF/javamail.charset.map");
834    
835                if (map != null) {
836                    // get a reader for this so we can load.
837                    BufferedReader reader = new BufferedReader(new InputStreamReader(map));
838    
839                    readMappings(reader, java2mime);
840                    readMappings(reader, mime2java);
841                }
842            } catch (Exception e) {
843            }
844    
845            // if any sort of error occurred reading the preferred file version, we could end up with empty
846            // mapping tables.  This could cause all sorts of difficulty, so ensure they are populated with at
847            // least a reasonable set of defaults.
848    
849            // these mappings echo what's in the default file.
850            if (java2mime.isEmpty()) {
851                java2mime.put("8859_1", "ISO-8859-1");
852                java2mime.put("iso8859_1", "ISO-8859-1");
853                java2mime.put("iso8859-1", "ISO-8859-1");
854    
855                java2mime.put("8859_2", "ISO-8859-2");
856                java2mime.put("iso8859_2", "ISO-8859-2");
857                java2mime.put("iso8859-2", "ISO-8859-2");
858    
859                java2mime.put("8859_3", "ISO-8859-3");
860                java2mime.put("iso8859_3", "ISO-8859-3");
861                java2mime.put("iso8859-3", "ISO-8859-3");
862    
863                java2mime.put("8859_4", "ISO-8859-4");
864                java2mime.put("iso8859_4", "ISO-8859-4");
865                java2mime.put("iso8859-4", "ISO-8859-4");
866    
867                java2mime.put("8859_5", "ISO-8859-5");
868                java2mime.put("iso8859_5", "ISO-8859-5");
869                java2mime.put("iso8859-5", "ISO-8859-5");
870    
871                java2mime.put ("8859_6", "ISO-8859-6");
872                java2mime.put("iso8859_6", "ISO-8859-6");
873                java2mime.put("iso8859-6", "ISO-8859-6");
874    
875                java2mime.put("8859_7", "ISO-8859-7");
876                java2mime.put("iso8859_7", "ISO-8859-7");
877                java2mime.put("iso8859-7", "ISO-8859-7");
878    
879                java2mime.put("8859_8", "ISO-8859-8");
880                java2mime.put("iso8859_8", "ISO-8859-8");
881                java2mime.put("iso8859-8", "ISO-8859-8");
882    
883                java2mime.put("8859_9", "ISO-8859-9");
884                java2mime.put("iso8859_9", "ISO-8859-9");
885                java2mime.put("iso8859-9", "ISO-8859-9");
886    
887                java2mime.put("sjis", "Shift_JIS");
888                java2mime.put ("jis", "ISO-2022-JP");
889                java2mime.put("iso2022jp", "ISO-2022-JP");
890                java2mime.put("euc_jp", "euc-jp");
891                java2mime.put("koi8_r", "koi8-r");
892                java2mime.put("euc_cn", "euc-cn");
893                java2mime.put("euc_tw", "euc-tw");
894                java2mime.put("euc_kr", "euc-kr");
895            }
896    
897            if (mime2java.isEmpty ()) {
898                mime2java.put("iso-2022-cn", "ISO2022CN");
899                mime2java.put("iso-2022-kr", "ISO2022KR");
900                mime2java.put("utf-8", "UTF8");
901                mime2java.put("utf8", "UTF8");
902                mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
903                mime2java.put("ja_jp.eucjp", "EUCJIS");
904                mime2java.put ("euc-kr", "KSC5601");
905                mime2java.put("euckr", "KSC5601");
906                mime2java.put("us-ascii", "ISO-8859-1");
907                mime2java.put("x-us-ascii", "ISO-8859-1");
908            }
909        }
910    
911    
912        /**
913         * Read a section of a character map table and populate the
914         * target mapping table with the information.  The table end
915         * is marked by a line starting with "--" and also ending with
916         * "--".  Blank lines and comment lines (beginning with '#') are
917         * ignored.
918         *
919         * @param reader The source of the file information.
920         * @param table  The mapping table used to store the information.
921         */
922        static private void readMappings(BufferedReader reader, Map table) throws IOException {
923            // process lines to the EOF or the end of table marker.
924            while (true) {
925                String line = reader.readLine();
926                // no line returned is an EOF
927                if (line == null) {
928                    return;
929                }
930    
931                // trim so we're not messed up by trailing blanks
932                line = line.trim();
933    
934                if (line.length() == 0 || line.startsWith("#")) {
935                    continue;
936                }
937    
938                // stop processing if this is the end-of-table marker.
939                if (line.startsWith("--") && line.endsWith("--")) {
940                    return;
941                }
942    
943                // we allow either blanks or tabs as token delimiters.
944                StringTokenizer tokenizer = new StringTokenizer(line, " \t");
945    
946                try {
947                    String from = tokenizer.nextToken().toLowerCase();
948                    String to = tokenizer.nextToken();
949    
950                    table.put(from, to);
951                } catch (NoSuchElementException e) {
952                    // just ignore the line if invalid.
953                }
954            }
955        }
956    
957    
958        /**
959         * Perform RFC 2047 text folding on a string of text.
960         *
961         * @param used   The amount of text already "used up" on this line.  This is
962         *               typically the length of a message header that this text
963         *               get getting added to.
964         * @param s      The text to fold.
965         *
966         * @return The input text, with linebreaks inserted at appropriate fold points.
967         */
968        public static String fold(int used, String s) {
969            // if folding is disable, unfolding is also.  Return the string unchanged.
970            if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) {
971                return s;
972            }
973    
974            int end;
975    
976            // now we need to strip off any trailing "whitespace", where whitespace is blanks, tabs,
977            // and line break characters.
978            for (end = s.length() - 1; end >= 0; end--) {
979                int ch = s.charAt(end);
980                if (ch != ' ' && ch != '\t' ) {
981                    break;
982                }
983            }
984    
985            // did we actually find something to remove?  Shorten the String to the trimmed length
986            if (end != s.length() - 1) {
987                s = s.substring(0, end + 1);
988            }
989    
990            // does the string as it exists now not require folding?  We can just had that back right off.
991            if (s.length() + used <= FOLD_THRESHOLD) {
992                return s;
993            }
994    
995            // get a buffer for the length of the string, plus room for a few line breaks.
996            // these are soft line breaks, so we generally need more that just the line breaks (an escape +
997            // CR + LF + leading space on next line);
998            StringBuffer newString = new StringBuffer(s.length() + 8);
999    
1000    
1001            // now keep chopping this down until we've accomplished what we need.
1002            while (used + s.length() > FOLD_THRESHOLD) {
1003                int breakPoint = -1;
1004                char breakChar = 0;
1005    
1006                // now scan for the next place where we can break.
1007                for (int i = 0; i < s.length(); i++) {
1008                    // have we passed the fold limit?
1009                    if (used + i > FOLD_THRESHOLD) {
1010                        // if we've already seen a blank, then stop now.  Otherwise
1011                        // we keep going until we hit a fold point.
1012                        if (breakPoint != -1) {
1013                            break;
1014                        }
1015                    }
1016                    char ch = s.charAt(i);
1017    
1018                    // a white space character?
1019                    if (ch == ' ' || ch == '\t') {
1020                        // this might be a run of white space, so skip over those now.
1021                        breakPoint = i;
1022                        // we need to maintain the same character type after the inserted linebreak.
1023                        breakChar = ch;
1024                        i++;
1025                        while (i < s.length()) {
1026                            ch = s.charAt(i);
1027                            if (ch != ' ' && ch != '\t') {
1028                                break;
1029                            }
1030                            i++;
1031                        }
1032                    }
1033                    // found an embedded new line.  Escape this so that the unfolding process preserves it.
1034                    else if (ch == '\n') {
1035                        newString.append('\\');
1036                        newString.append('\n');
1037                    }
1038                    else if (ch == '\r') {
1039                        newString.append('\\');
1040                        newString.append('\n');
1041                        i++;
1042                        // if this is a CRLF pair, add the second char also
1043                        if (i < s.length() && s.charAt(i) == '\n') {
1044                            newString.append('\r');
1045                        }
1046                    }
1047    
1048                }
1049                // no fold point found, we punt, append the remainder and leave.
1050                if (breakPoint == -1) {
1051                    newString.append(s);
1052                    return newString.toString();
1053                }
1054                newString.append(s.substring(0, breakPoint));
1055                newString.append("\r\n");
1056                newString.append(breakChar);
1057                // chop the string
1058                s = s.substring(breakPoint + 1);
1059                // start again, and we've used the first char of the limit already with the whitespace char.
1060                used = 1;
1061            }
1062    
1063            // add on the remainder, and return
1064            newString.append(s);
1065            return newString.toString();
1066        }
1067    
1068        /**
1069         * Unfold a folded string.  The unfolding process will remove
1070         * any line breaks that are not escaped and which are also followed
1071         * by whitespace characters.
1072         *
1073         * @param s      The folded string.
1074         *
1075         * @return A new string with unfolding rules applied.
1076         */
1077        public static String unfold(String s) {
1078            // if folding is disable, unfolding is also.  Return the string unchanged.
1079            if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) {
1080                return s;
1081            }
1082    
1083            // if there are no line break characters in the string, we can just return this.
1084            if (s.indexOf('\n') < 0 && s.indexOf('\r') < 0) {
1085                return s;
1086            }
1087    
1088            // we need to scan and fix things up.
1089            int length = s.length();
1090    
1091            StringBuffer newString = new StringBuffer(length);
1092    
1093            // scan the entire string
1094            for (int i = 0; i < length; i++) {
1095                char ch = s.charAt(i);
1096    
1097                // we have a backslash.  In folded strings, escape characters are only processed as such if
1098                // they preceed line breaks.  Otherwise, we leave it be.
1099                if (ch == '\\') {
1100                    // escape at the very end?  Just add the character.
1101                    if (i == length - 1) {
1102                        newString.append(ch);
1103                    }
1104                    else {
1105                        int nextChar = s.charAt(i + 1);
1106    
1107                        // naked newline?  Add the new line to the buffer, and skip the escape char.
1108                        if (nextChar == '\n') {
1109                            newString.append('\n');
1110                            i++;
1111                        }
1112                        else if (nextChar == '\r') {
1113                            // just the CR left?  Add it, removing the escape.
1114                            if (i == length - 2 || s.charAt(i + 2) != '\r') {
1115                                newString.append('\r');
1116                                i++;
1117                            }
1118                            else {
1119                                // toss the escape, add both parts of the CRLF, and skip over two chars.
1120                                newString.append('\r');
1121                                newString.append('\n');
1122                                i += 2;
1123                            }
1124                        }
1125                        else {
1126                            // an escape for another purpose, just copy it over.
1127                            newString.append(ch);
1128                        }
1129                    }
1130                }
1131                // we have an unescaped line break
1132                else if (ch == '\n' || ch == '\r') {
1133                    // remember the position in case we need to backtrack.
1134                    int lineBreak = i;
1135                    boolean CRLF = false;
1136    
1137                    if (ch == '\r') {
1138                        // check to see if we need to step over this.
1139                        if (i < length - 1 && s.charAt(i + 1) == '\n') {
1140                            i++;
1141                            // flag the type so we know what we might need to preserve.
1142                            CRLF = true;
1143                        }
1144                    }
1145    
1146                    // get a temp position scanner.
1147                    int scan = i + 1;
1148    
1149                    // does a blank follow this new line?  we need to scrap the new line and reduce the leading blanks
1150                    // down to a single blank.
1151                    if (scan < length && s.charAt(scan) == ' ') {
1152                        // add the character
1153                        newString.append(' ');
1154    
1155                        // scan over the rest of the blanks
1156                        i = scan + 1;
1157                        while (i < length && s.charAt(i) == ' ') {
1158                            i++;
1159                        }
1160                        // we'll increment down below, so back up to the last blank as the current char.
1161                        i--;
1162                    }
1163                    else {
1164                        // we must keep this line break.  Append the appropriate style.
1165                        if (CRLF) {
1166                            newString.append("\r\n");
1167                        }
1168                        else {
1169                            newString.append(ch);
1170                        }
1171                    }
1172                }
1173                else {
1174                    // just a normal, ordinary character
1175                    newString.append(ch);
1176                }
1177            }
1178            return newString.toString();
1179        }
1180    }
1181    
1182    
1183    /**
1184     * Utility class for examining content information written out
1185     * by a DataHandler object.  This stream gathers statistics on
1186     * the stream so it can make transfer encoding determinations.
1187     */
1188    class ContentCheckingOutputStream extends OutputStream {
1189        private int asciiChars = 0;
1190        private int nonAsciiChars = 0;
1191        private boolean containsLongLines = false;
1192        private boolean containsMalformedEOL = false;
1193        private int previousChar = 0;
1194        private int span = 0;
1195    
1196        ContentCheckingOutputStream() {
1197        }
1198    
1199        public void write(byte[] data) throws IOException {
1200            write(data, 0, data.length);
1201        }
1202    
1203        public void write(byte[] data, int offset, int length) throws IOException {
1204            for (int i = 0; i < length; i++) {
1205                write(data[offset + i]);
1206            }
1207        }
1208    
1209        public void write(int ch) {
1210            // we found a linebreak.  Reset the line length counters on either one.  We don't
1211            // really need to validate here.
1212            if (ch == '\n' || ch == '\r') {
1213                // we found a newline, this is only valid if the previous char was the '\r'
1214                if (ch == '\n') {
1215                    // malformed linebreak?  force this to base64 encoding.
1216                    if (previousChar != '\r') {
1217                        containsMalformedEOL = true;
1218                    }
1219                }
1220                // hit a line end, reset our line length counter
1221                span = 0;
1222            }
1223            else {
1224                span++;
1225                // the text has long lines, we can't transfer this as unencoded text.
1226                if (span > 998) {
1227                    containsLongLines = true;
1228                }
1229    
1230                // non-ascii character, we have to transfer this in binary.
1231                if (!ASCIIUtil.isAscii(ch)) {
1232                    nonAsciiChars++;
1233                }
1234                else {
1235                    asciiChars++;
1236                }
1237            }
1238            previousChar = ch;
1239        }
1240    
1241    
1242        public String getBinaryTransferEncoding() {
1243            if (nonAsciiChars != 0 || containsLongLines || containsMalformedEOL) {
1244                return "base64";
1245            }
1246            else {
1247                return "7bit";
1248            }
1249        }
1250    
1251        public String getTextTransferEncoding() {
1252            // looking good so far, only valid chars here.
1253            if (nonAsciiChars == 0) {
1254                // does this contain long text lines?  We need to use a Q-P encoding which will
1255                // be only slightly longer, but handles folding the longer lines.
1256                if (containsLongLines) {
1257                    return "quoted-printable";
1258                }
1259                else {
1260                    // ideal!  Easiest one to handle.
1261                    return "7bit";
1262                }
1263            }
1264            else {
1265                // mostly characters requiring encoding?  Base64 is our best bet.
1266                if (nonAsciiChars > asciiChars) {
1267                    return "base64";
1268                }
1269                else {
1270                    // Q-P encoding will use fewer bytes than the full Base64.
1271                    return "quoted-printable";
1272                }
1273            }
1274        }
1275    }