001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *  http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    
020    package javax.mail.internet;
021    
022    import java.io.BufferedInputStream;
023    import java.io.BufferedReader;
024    import java.io.ByteArrayInputStream;
025    import java.io.ByteArrayOutputStream;
026    import java.io.IOException;
027    import java.io.InputStream;
028    import java.io.InputStreamReader;
029    import java.io.OutputStream;
030    import java.io.UnsupportedEncodingException;
031    import java.util.HashMap;
032    import java.util.Map;
033    import java.util.NoSuchElementException;
034    import java.util.StringTokenizer;
035    
036    import javax.activation.DataHandler;
037    import javax.activation.DataSource;
038    import javax.mail.MessagingException;
039    
040    import org.apache.geronimo.mail.util.ASCIIUtil;
041    import org.apache.geronimo.mail.util.Base64;
042    import org.apache.geronimo.mail.util.Base64DecoderStream;
043    import org.apache.geronimo.mail.util.Base64Encoder;
044    import org.apache.geronimo.mail.util.Base64EncoderStream;
045    import org.apache.geronimo.mail.util.QuotedPrintableDecoderStream;
046    import org.apache.geronimo.mail.util.QuotedPrintableEncoderStream;
047    import org.apache.geronimo.mail.util.QuotedPrintableEncoder;
048    import org.apache.geronimo.mail.util.QuotedPrintable;
049    import org.apache.geronimo.mail.util.SessionUtil;
050    import org.apache.geronimo.mail.util.UUDecoderStream;
051    import org.apache.geronimo.mail.util.UUEncoderStream;
052    
053    // encodings include "base64", "quoted-printable", "7bit", "8bit" and "binary".
054    // In addition, "uuencode" is also supported. The
055    
056    /**
057     * @version $Rev: 627556 $ $Date: 2008-02-13 13:27:22 -0500 (Wed, 13 Feb 2008) $
058     */
059    public class MimeUtility {
060    
061        private static final String MIME_FOLDENCODEDWORDS = "mail.mime.foldencodedwords";
062        private static final String MIME_DECODE_TEXT_STRICT = "mail.mime.decodetext.strict";
063        private static final String MIME_FOLDTEXT = "mail.mime.foldtext";
064        private static final int FOLD_THRESHOLD = 76;
065    
066        private MimeUtility() {
067        }
068    
069        public static final int ALL = -1;
070    
071        private static String defaultJavaCharset;
072        private static String escapedChars = "\"\\\r\n";
073        private static String linearWhiteSpace = " \t\r\n";
074    
075        private static String QP_WORD_SPECIALS = "=_?\"#$%&'(),.:;<>@[\\]^`{|}~";
076        private static String QP_TEXT_SPECIALS = "=_?";
077    
078        // the javamail spec includes the ability to map java encoding names to MIME-specified names.  Normally,
079        // these values are loaded from a character mapping file.
080        private static Map java2mime;
081        private static Map mime2java;
082    
083        static {
084            // we need to load the mapping tables used by javaCharset() and mimeCharset().
085            loadCharacterSetMappings();
086        }
087    
088        public static InputStream decode(InputStream in, String encoding) throws MessagingException {
089            encoding = encoding.toLowerCase();
090    
091            // some encodies are just pass-throughs, with no real decoding.
092            if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
093                return in;
094            }
095            else if (encoding.equals("base64")) {
096                return new Base64DecoderStream(in);
097            }
098            // UUEncode is known by a couple historical extension names too.
099            else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
100                return new UUDecoderStream(in);
101            }
102            else if (encoding.equals("quoted-printable")) {
103                return new QuotedPrintableDecoderStream(in);
104            }
105            else {
106                throw new MessagingException("Unknown encoding " + encoding);
107            }
108        }
109    
110        /**
111         * Decode a string of text obtained from a mail header into
112         * it's proper form.  The text generally will consist of a
113         * string of tokens, some of which may be encoded using
114         * base64 encoding.
115         *
116         * @param text   The text to decode.
117         *
118         * @return The decoded test string.
119         * @exception UnsupportedEncodingException
120         */
121        public static String decodeText(String text) throws UnsupportedEncodingException {
122            // if the text contains any encoded tokens, those tokens will be marked with "=?".  If the
123            // source string doesn't contain that sequent, no decoding is required.
124            if (text.indexOf("=?") < 0) {
125                return text;
126            }
127    
128            // we have two sets of rules we can apply.
129            if (!SessionUtil.getBooleanProperty(MIME_DECODE_TEXT_STRICT, true)) {
130                return decodeTextNonStrict(text);
131            }
132    
133            int offset = 0;
134            int endOffset = text.length();
135    
136            int startWhiteSpace = -1;
137            int endWhiteSpace = -1;
138    
139            StringBuffer decodedText = new StringBuffer(text.length());
140    
141            boolean previousTokenEncoded = false;
142    
143            while (offset < endOffset) {
144                char ch = text.charAt(offset);
145    
146                // is this a whitespace character?
147                if (linearWhiteSpace.indexOf(ch) != -1) {
148                    startWhiteSpace = offset;
149                    while (offset < endOffset) {
150                        // step over the white space characters.
151                        ch = text.charAt(offset);
152                        if (linearWhiteSpace.indexOf(ch) != -1) {
153                            offset++;
154                        }
155                        else {
156                            // record the location of the first non lwsp and drop down to process the
157                            // token characters.
158                            endWhiteSpace = offset;
159                            break;
160                        }
161                    }
162                }
163                else {
164                    // we have a word token.  We need to scan over the word and then try to parse it.
165                    int wordStart = offset;
166    
167                    while (offset < endOffset) {
168                        // step over the white space characters.
169                        ch = text.charAt(offset);
170                        if (linearWhiteSpace.indexOf(ch) == -1) {
171                            offset++;
172                        }
173                        else {
174                            break;
175                        }
176    
177                        //NB:  Trailing whitespace on these header strings will just be discarded.
178                    }
179                    // pull out the word token.
180                    String word = text.substring(wordStart, offset);
181                    // is the token encoded?  decode the word
182                    if (word.startsWith("=?")) {
183                        try {
184                            // if this gives a parsing failure, treat it like a non-encoded word.
185                            String decodedWord = decodeWord(word);
186    
187                            // are any whitespace characters significant?  Append 'em if we've got 'em.
188                            if (!previousTokenEncoded) {
189                                if (startWhiteSpace != -1) {
190                                    decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
191                                    startWhiteSpace = -1;
192                                }
193                            }
194                            // this is definitely a decoded token.
195                            previousTokenEncoded = true;
196                            // and add this to the text.
197                            decodedText.append(decodedWord);
198                            // we continue parsing from here...we allow parsing errors to fall through
199                            // and get handled as normal text.
200                            continue;
201    
202                        } catch (ParseException e) {
203                        }
204                    }
205                    // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
206                    // if we have it.
207                    if (startWhiteSpace != -1) {
208                        decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
209                        startWhiteSpace = -1;
210                    }
211                    // this is not a decoded token.
212                    previousTokenEncoded = false;
213                    decodedText.append(word);
214                }
215            }
216    
217            return decodedText.toString();
218        }
219    
220    
221        /**
222         * Decode a string of text obtained from a mail header into
223         * it's proper form.  The text generally will consist of a
224         * string of tokens, some of which may be encoded using
225         * base64 encoding.  This is for non-strict decoded for mailers that
226         * violate the RFC 2047 restriction that decoded tokens must be delimited
227         * by linear white space.  This will scan tokens looking for inner tokens
228         * enclosed in "=?" -- "?=" pairs.
229         *
230         * @param text   The text to decode.
231         *
232         * @return The decoded test string.
233         * @exception UnsupportedEncodingException
234         */
235        private static String decodeTextNonStrict(String text) throws UnsupportedEncodingException {
236            int offset = 0;
237            int endOffset = text.length();
238    
239            int startWhiteSpace = -1;
240            int endWhiteSpace = -1;
241    
242            StringBuffer decodedText = new StringBuffer(text.length());
243    
244            boolean previousTokenEncoded = false;
245    
246            while (offset < endOffset) {
247                char ch = text.charAt(offset);
248    
249                // is this a whitespace character?
250                if (linearWhiteSpace.indexOf(ch) != -1) {
251                    startWhiteSpace = offset;
252                    while (offset < endOffset) {
253                        // step over the white space characters.
254                        ch = text.charAt(offset);
255                        if (linearWhiteSpace.indexOf(ch) != -1) {
256                            offset++;
257                        }
258                        else {
259                            // record the location of the first non lwsp and drop down to process the
260                            // token characters.
261                            endWhiteSpace = offset;
262                            break;
263                        }
264                    }
265                }
266                else {
267                    // we're at the start of a word token.  We potentially need to break this up into subtokens
268                    int wordStart = offset;
269    
270                    while (offset < endOffset) {
271                        // step over the white space characters.
272                        ch = text.charAt(offset);
273                        if (linearWhiteSpace.indexOf(ch) == -1) {
274                            offset++;
275                        }
276                        else {
277                            break;
278                        }
279    
280                        //NB:  Trailing whitespace on these header strings will just be discarded.
281                    }
282                    // pull out the word token.
283                    String word = text.substring(wordStart, offset);
284    
285                    int decodeStart = 0;
286    
287                    // now scan and process each of the bits within here.
288                    while (decodeStart < word.length()) {
289                        int tokenStart = word.indexOf("=?", decodeStart);
290                        if (tokenStart == -1) {
291                            // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
292                            // if we have it.
293                            if (startWhiteSpace != -1) {
294                                decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
295                                startWhiteSpace = -1;
296                            }
297                            // this is not a decoded token.
298                            previousTokenEncoded = false;
299                            decodedText.append(word.substring(decodeStart));
300                            // we're finished.
301                            break;
302                        }
303                        // we have something to process
304                        else {
305                            // we might have a normal token preceeding this.
306                            if (tokenStart != decodeStart) {
307                                // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
308                                // if we have it.
309                                if (startWhiteSpace != -1) {
310                                    decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
311                                    startWhiteSpace = -1;
312                                }
313                                // this is not a decoded token.
314                                previousTokenEncoded = false;
315                                decodedText.append(word.substring(decodeStart, tokenStart));
316                            }
317    
318                            // now find the end marker.
319                            int tokenEnd = word.indexOf("?=", tokenStart);
320                            // sigh, an invalid token.  Treat this as plain text.
321                            if (tokenEnd == -1) {
322                                // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
323                                // if we have it.
324                                if (startWhiteSpace != -1) {
325                                    decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
326                                    startWhiteSpace = -1;
327                                }
328                                // this is not a decoded token.
329                                previousTokenEncoded = false;
330                                decodedText.append(word.substring(tokenStart));
331                                // we're finished.
332                                break;
333                            }
334                            else {
335                                // update our ticker
336                                decodeStart = tokenEnd + 2;
337    
338                                String token = word.substring(tokenStart, tokenEnd);
339                                try {
340                                    // if this gives a parsing failure, treat it like a non-encoded word.
341                                    String decodedWord = decodeWord(token);
342    
343                                    // are any whitespace characters significant?  Append 'em if we've got 'em.
344                                    if (!previousTokenEncoded) {
345                                        if (startWhiteSpace != -1) {
346                                            decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
347                                            startWhiteSpace = -1;
348                                        }
349                                    }
350                                    // this is definitely a decoded token.
351                                    previousTokenEncoded = true;
352                                    // and add this to the text.
353                                    decodedText.append(decodedWord);
354                                    // we continue parsing from here...we allow parsing errors to fall through
355                                    // and get handled as normal text.
356                                    continue;
357    
358                                } catch (ParseException e) {
359                                }
360                                // this is a normal token, so it doesn't matter what the previous token was.  Add the white space
361                                // if we have it.
362                                if (startWhiteSpace != -1) {
363                                    decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
364                                    startWhiteSpace = -1;
365                                }
366                                // this is not a decoded token.
367                                previousTokenEncoded = false;
368                                decodedText.append(token);
369                            }
370                        }
371                    }
372                }
373            }
374    
375            return decodedText.toString();
376        }
377    
378        /**
379         * Parse a string using the RFC 2047 rules for an "encoded-word"
380         * type.  This encoding has the syntax:
381         *
382         * encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
383         *
384         * @param word   The possibly encoded word value.
385         *
386         * @return The decoded word.
387         * @exception ParseException
388         * @exception UnsupportedEncodingException
389         */
390        public static String decodeWord(String word) throws ParseException, UnsupportedEncodingException {
391            // encoded words start with the characters "=?".  If this not an encoded word, we throw a
392            // ParseException for the caller.
393    
394            if (!word.startsWith("=?")) {
395                throw new ParseException("Invalid RFC 2047 encoded-word: " + word);
396            }
397    
398            int charsetPos = word.indexOf('?', 2);
399            if (charsetPos == -1) {
400                throw new ParseException("Missing charset in RFC 2047 encoded-word: " + word);
401            }
402    
403            // pull out the character set information (this is the MIME name at this point).
404            String charset = word.substring(2, charsetPos).toLowerCase();
405    
406            // now pull out the encoding token the same way.
407            int encodingPos = word.indexOf('?', charsetPos + 1);
408            if (encodingPos == -1) {
409                throw new ParseException("Missing encoding in RFC 2047 encoded-word: " + word);
410            }
411    
412            String encoding = word.substring(charsetPos + 1, encodingPos);
413    
414            // and finally the encoded text.
415            int encodedTextPos = word.indexOf("?=", encodingPos + 1);
416            if (encodedTextPos == -1) {
417                throw new ParseException("Missing encoded text in RFC 2047 encoded-word: " + word);
418            }
419    
420            String encodedText = word.substring(encodingPos + 1, encodedTextPos);
421    
422            // seems a bit silly to encode a null string, but easy to deal with.
423            if (encodedText.length() == 0) {
424                return "";
425            }
426    
427            try {
428                // the decoder writes directly to an output stream.
429                ByteArrayOutputStream out = new ByteArrayOutputStream(encodedText.length());
430    
431                byte[] encodedData = encodedText.getBytes("US-ASCII");
432    
433                // Base64 encoded?
434                if (encoding.equals("B")) {
435                    Base64.decode(encodedData, out);
436                }
437                // maybe quoted printable.
438                else if (encoding.equals("Q")) {
439                    QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
440                    dataEncoder.decodeWord(encodedData, out);
441                }
442                else {
443                    throw new UnsupportedEncodingException("Unknown RFC 2047 encoding: " + encoding);
444                }
445                // get the decoded byte data and convert into a string.
446                byte[] decodedData = out.toByteArray();
447                return new String(decodedData, javaCharset(charset));
448            } catch (IOException e) {
449                throw new UnsupportedEncodingException("Invalid RFC 2047 encoding");
450            }
451    
452        }
453    
454        /**
455         * Wrap an encoder around a given output stream.
456         *
457         * @param out      The output stream to wrap.
458         * @param encoding The name of the encoding.
459         *
460         * @return A instance of FilterOutputStream that manages on the fly
461         *         encoding for the requested encoding type.
462         * @exception MessagingException
463         */
464        public static OutputStream encode(OutputStream out, String encoding) throws MessagingException {
465            // no encoding specified, so assume it goes out unchanged.
466            if (encoding == null) {
467                return out;
468            }
469    
470            encoding = encoding.toLowerCase();
471    
472            // some encodies are just pass-throughs, with no real decoding.
473            if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
474                return out;
475            }
476            else if (encoding.equals("base64")) {
477                return new Base64EncoderStream(out);
478            }
479            // UUEncode is known by a couple historical extension names too.
480            else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
481                return new UUEncoderStream(out);
482            }
483            else if (encoding.equals("quoted-printable")) {
484                return new QuotedPrintableEncoderStream(out);
485            }
486            else {
487                throw new MessagingException("Unknown encoding " + encoding);
488            }
489        }
490    
491        /**
492         * Wrap an encoder around a given output stream.
493         *
494         * @param out      The output stream to wrap.
495         * @param encoding The name of the encoding.
496         * @param filename The filename of the data being sent (only used for UUEncode).
497         *
498         * @return A instance of FilterOutputStream that manages on the fly
499         *         encoding for the requested encoding type.
500         * @exception MessagingException
501         */
502        public static OutputStream encode(OutputStream out, String encoding, String filename) throws MessagingException {
503            encoding = encoding.toLowerCase();
504    
505            // some encodies are just pass-throughs, with no real decoding.
506            if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
507                return out;
508            }
509            else if (encoding.equals("base64")) {
510                return new Base64EncoderStream(out);
511            }
512            // UUEncode is known by a couple historical extension names too.
513            else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
514                return new UUEncoderStream(out, filename);
515            }
516            else if (encoding.equals("quoted-printable")) {
517                 return new QuotedPrintableEncoderStream(out);
518            }
519            else {
520                throw new MessagingException("Unknown encoding " + encoding);
521            }
522        }
523    
524    
525        public static String encodeText(String word) throws UnsupportedEncodingException {
526            return encodeText(word, null, null);
527        }
528    
529        public static String encodeText(String word, String charset, String encoding) throws UnsupportedEncodingException {
530            return encodeWord(word, charset, encoding, false);
531        }
532    
533        public static String encodeWord(String word) throws UnsupportedEncodingException {
534            return encodeWord(word, null, null);
535        }
536    
537        public static String encodeWord(String word, String charset, String encoding) throws UnsupportedEncodingException {
538            return encodeWord(word, charset, encoding, true);
539        }
540    
541    
542        private static String encodeWord(String word, String charset, String encoding, boolean encodingWord) throws UnsupportedEncodingException {
543    
544            // figure out what we need to encode this.
545            String encoder = ASCIIUtil.getTextTransferEncoding(word);
546            // all ascii?  We can return this directly,
547            if (encoder.equals("7bit")) {
548                return word;
549            }
550    
551            // if not given a charset, use the default.
552            if (charset == null) {
553                charset = getDefaultMIMECharset();
554            }
555    
556            // sort out the encoder.  If not explicitly given, use the best guess we've already established.
557            if (encoding != null) {
558                if (encoding.equalsIgnoreCase("B")) {
559                    encoder = "base64";
560                }
561                else if (encoding.equalsIgnoreCase("Q")) {
562                    encoder = "quoted-printable";
563                }
564                else {
565                    throw new UnsupportedEncodingException("Unknown transfer encoding: " + encoding);
566                }
567            }
568    
569            try {
570                
571                // we'll format this directly into the string buffer 
572                StringBuffer result = new StringBuffer(); 
573                
574                // this is the maximum size of a segment of encoded data, which is based off 
575                // of a 75 character size limit and all of the encoding overhead elements.
576                int sizeLimit = 75 - 7 - charset.length();
577                
578                // now do the appropriate encoding work 
579                if (encoder.equals("base64")) {
580                    Base64Encoder dataEncoder = new Base64Encoder();
581                    // this may recurse on the encoding if the string is too long.  The left-most will not 
582                    // get a segment delimiter 
583                    encodeBase64(word, result, sizeLimit, charset, dataEncoder, true, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false)); 
584                }
585                else {
586                    QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
587                    encodeQuotedPrintable(word, result, sizeLimit, charset, dataEncoder, true, 
588                        SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false), encodingWord ? QP_WORD_SPECIALS : QP_TEXT_SPECIALS); 
589                }
590                return result.toString();    
591            } catch (IOException e) {
592                throw new UnsupportedEncodingException("Invalid encoding");
593            }
594        }
595        
596        
597        /**
598         * Encode a string into base64 encoding, taking into 
599         * account the maximum segment length. 
600         * 
601         * @param data      The string data to encode.
602         * @param out       The output buffer used for the result.
603         * @param sizeLimit The maximum amount of encoded data we're allowed
604         *                  to have in a single encoded segment.
605         * @param charset   The character set marker that needs to be added to the
606         *                  encoding header.
607         * @param encoder   The encoder instance we're using.
608         * @param firstSegment
609         *                  If true, this is the first (left-most) segment in the
610         *                  data.  Used to determine if segment delimiters need to
611         *                  be added between sections.
612         * @param foldSegments
613         *                  Indicates the type of delimiter to use (blank or newline sequence).
614         */
615        static private void encodeBase64(String data, StringBuffer out, int sizeLimit, String charset, Base64Encoder encoder, boolean firstSegment, boolean foldSegments) throws IOException
616        {
617            // this needs to be converted into the appropriate transfer encoding. 
618            byte [] bytes = data.getBytes(javaCharset(charset)); 
619            
620            int estimatedSize = encoder.estimateEncodedLength(bytes); 
621            
622            // if the estimated encoding size is over our segment limit, split the string in half and 
623            // recurse.  Eventually we'll reach a point where things are small enough.  
624            if (estimatedSize > sizeLimit) {
625                // the first segment indicator travels with the left half. 
626                encodeBase64(data.substring(0, data.length() / 2), out, sizeLimit, charset, encoder, firstSegment, foldSegments);
627                // the second half can never be the first segment 
628                encodeBase64(data.substring(data.length() / 2), out, sizeLimit, charset, encoder, false, foldSegments);
629            }
630            else 
631            {
632                // if this is not the first sement of the encoding, we need to add either a blank or 
633                // a newline sequence to the data 
634                if (!firstSegment) {
635                    if (foldSegments) {
636                        out.append("\r\n"); 
637                    }
638                    else {
639                        out.append(' '); 
640                    }
641                }
642                // do the encoding of the segment.
643                encoder.encodeWord(bytes, out, charset);
644            }
645        }
646        
647        
648        /**
649         * Encode a string into quoted printable encoding, taking into 
650         * account the maximum segment length. 
651         * 
652         * @param data      The string data to encode.
653         * @param out       The output buffer used for the result.
654         * @param sizeLimit The maximum amount of encoded data we're allowed
655         *                  to have in a single encoded segment.
656         * @param charset   The character set marker that needs to be added to the
657         *                  encoding header.
658         * @param encoder   The encoder instance we're using.
659         * @param firstSegment
660         *                  If true, this is the first (left-most) segment in the
661         *                  data.  Used to determine if segment delimiters need to
662         *                  be added between sections.
663         * @param foldSegments
664         *                  Indicates the type of delimiter to use (blank or newline sequence).
665         */
666        static private void encodeQuotedPrintable(String data, StringBuffer out, int sizeLimit, String charset, QuotedPrintableEncoder encoder, 
667            boolean firstSegment, boolean foldSegments, String specials)  throws IOException 
668        {
669            // this needs to be converted into the appropriate transfer encoding. 
670            byte [] bytes = data.getBytes(javaCharset(charset)); 
671            
672            int estimatedSize = encoder.estimateEncodedLength(bytes, specials); 
673            
674            // if the estimated encoding size is over our segment limit, split the string in half and 
675            // recurse.  Eventually we'll reach a point where things are small enough.  
676            if (estimatedSize > sizeLimit) {
677                // the first segment indicator travels with the left half. 
678                encodeQuotedPrintable(data.substring(0, data.length() / 2), out, sizeLimit, charset, encoder, firstSegment, foldSegments, specials);
679                // the second half can never be the first segment 
680                encodeQuotedPrintable(data.substring(data.length() / 2), out, sizeLimit, charset, encoder, false, foldSegments, specials);
681            }
682            else 
683            {
684                // if this is not the first sement of the encoding, we need to add either a blank or 
685                // a newline sequence to the data 
686                if (!firstSegment) {
687                    if (foldSegments) {
688                        out.append("\r\n"); 
689                    }
690                    else {
691                        out.append(' '); 
692                    }
693                }
694                // do the encoding of the segment.
695                encoder.encodeWord(bytes, out, charset, specials);
696            }
697        }
698    
699    
700        /**
701         * Examine the content of a data source and decide what type
702         * of transfer encoding should be used.  For text streams,
703         * we'll decided between 7bit, quoted-printable, and base64.
704         * For binary content types, we'll use either 7bit or base64.
705         *
706         * @param handler The DataHandler associated with the content.
707         *
708         * @return The string name of an encoding used to transfer the content.
709         */
710        public static String getEncoding(DataHandler handler) {
711    
712    
713            // if this handler has an associated data source, we can read directly from the
714            // data source to make this judgment.  This is generally MUCH faster than asking the
715            // DataHandler to write out the data for us.
716            DataSource ds = handler.getDataSource();
717            if (ds != null) {
718                return getEncoding(ds);
719            }
720    
721            try {
722                // get a parser that allows us to make comparisons.
723                ContentType content = new ContentType(ds.getContentType());
724    
725                // The only access to the content bytes at this point is by asking the handler to write
726                // the information out to a stream.  We're going to pipe this through a special stream
727                // that examines the bytes as they go by.
728                ContentCheckingOutputStream checker = new ContentCheckingOutputStream();
729    
730                handler.writeTo(checker);
731    
732                // figure this out based on whether we believe this to be a text type or not.
733                if (content.match("text/*")) {
734                    return checker.getTextTransferEncoding();
735                }
736                else {
737                    return checker.getBinaryTransferEncoding();
738                }
739    
740            } catch (Exception e) {
741                // any unexpected I/O exceptions we'll force to a "safe" fallback position.
742                return "base64";
743            }
744        }
745    
746    
747        /**
748         * Determine the what transfer encoding should be used for
749         * data retrieved from a DataSource.
750         *
751         * @param source The DataSource for the transmitted data.
752         *
753         * @return The string name of the encoding form that should be used for
754         *         the data.
755         */
756        public static String getEncoding(DataSource source) {
757            InputStream in = null;
758    
759            try {
760                // get a parser that allows us to make comparisons.
761                ContentType content = new ContentType(source.getContentType());
762    
763                // we're probably going to have to scan the data.
764                in = source.getInputStream();
765    
766                if (!content.match("text/*")) {
767                    // Not purporting to be a text type?  Examine the content to see we might be able to
768                    // at least pretend it is an ascii type.
769                    return ASCIIUtil.getBinaryTransferEncoding(in);
770                }
771                else {
772                    return ASCIIUtil.getTextTransferEncoding(in);
773                }
774            } catch (Exception e) {
775                // this was a problem...not sure what makes sense here, so we'll assume it's binary
776                // and we need to transfer this using Base64 encoding.
777                return "base64";
778            } finally {
779                // make sure we close the stream
780                try {
781                    if (in != null) {
782                        in.close();
783                    }
784                } catch (IOException e) {
785                }
786            }
787        }
788    
789    
790        /**
791         * Quote a "word" value.  If the word contains any character from
792         * the specified "specials" list, this value is returned as a
793         * quoted strong.  Otherwise, it is returned unchanged (an "atom").
794         *
795         * @param word     The word requiring quoting.
796         * @param specials The set of special characters that can't appear in an unquoted
797         *                 string.
798         *
799         * @return The quoted value.  This will be unchanged if the word doesn't contain
800         *         any of the designated special characters.
801         */
802        public static String quote(String word, String specials) {
803            int wordLength = word.length();
804            boolean requiresQuoting = false;
805            // scan the string looking for problem characters
806            for (int i =0; i < wordLength; i++) {
807                char ch = word.charAt(i);
808                // special escaped characters require escaping, which also implies quoting.
809                if (escapedChars.indexOf(ch) >= 0) {
810                    return quoteAndEscapeString(word);
811                }
812                // now check for control characters or the designated special characters.
813                if (ch < 32 || ch >= 127 || specials.indexOf(ch) >= 0) {
814                    // we know this requires quoting, but we still need to scan the entire string to
815                    // see if contains chars that require escaping.  Just go ahead and treat it as if it does.
816                    return quoteAndEscapeString(word);
817                }
818            }
819            return word;
820        }
821    
822        /**
823         * Take a string and return it as a formatted quoted string, with
824         * all characters requiring escaping handled properly.
825         *
826         * @param word   The string to quote.
827         *
828         * @return The quoted string.
829         */
830        private static String quoteAndEscapeString(String word) {
831            int wordLength = word.length();
832            // allocate at least enough for the string and two quotes plus a reasonable number of escaped chars.
833            StringBuffer buffer = new StringBuffer(wordLength + 10);
834            // add the leading quote.
835            buffer.append('"');
836    
837            for (int i = 0; i < wordLength; i++) {
838                char ch = word.charAt(i);
839                // is this an escaped char?
840                if (escapedChars.indexOf(ch) >= 0) {
841                    // add the escape marker before appending.
842                    buffer.append('\\');
843                }
844                buffer.append(ch);
845            }
846            // now the closing quote
847            buffer.append('"');
848            return buffer.toString();
849        }
850    
851        /**
852         * Translate a MIME standard character set name into the Java
853         * equivalent.
854         *
855         * @param charset The MIME standard name.
856         *
857         * @return The Java equivalent for this name.
858         */
859        public static String javaCharset(String charset) {
860            // nothing in, nothing out.
861            if (charset == null) {
862                return null;
863            }
864    
865            String mappedCharset = (String)mime2java.get(charset.toLowerCase());
866            // if there is no mapping, then the original name is used.  Many of the MIME character set
867            // names map directly back into Java.  The reverse isn't necessarily true.
868            return mappedCharset == null ? charset : mappedCharset;
869        }
870    
871        /**
872         * Map a Java character set name into the MIME equivalent.
873         *
874         * @param charset The java character set name.
875         *
876         * @return The MIME standard equivalent for this character set name.
877         */
878        public static String mimeCharset(String charset) {
879            // nothing in, nothing out.
880            if (charset == null) {
881                return null;
882            }
883    
884            String mappedCharset = (String)java2mime.get(charset.toLowerCase());
885            // if there is no mapping, then the original name is used.  Many of the MIME character set
886            // names map directly back into Java.  The reverse isn't necessarily true.
887            return mappedCharset == null ? charset : mappedCharset;
888        }
889    
890    
891        /**
892         * Get the default character set to use, in Java name format.
893         * This either be the value set with the mail.mime.charset
894         * system property or obtained from the file.encoding system
895         * property.  If neither of these is set, we fall back to
896         * 8859_1 (basically US-ASCII).
897         *
898         * @return The character string value of the default character set.
899         */
900        public static String getDefaultJavaCharset() {
901            String charset = SessionUtil.getProperty("mail.mime.charset");
902            if (charset != null) {
903                return javaCharset(charset);
904            }
905            return SessionUtil.getProperty("file.encoding", "8859_1");
906        }
907    
908        /**
909         * Get the default character set to use, in MIME name format.
910         * This either be the value set with the mail.mime.charset
911         * system property or obtained from the file.encoding system
912         * property.  If neither of these is set, we fall back to
913         * 8859_1 (basically US-ASCII).
914         *
915         * @return The character string value of the default character set.
916         */
917        static String getDefaultMIMECharset() {
918            // if the property is specified, this can be used directly.
919            String charset = SessionUtil.getProperty("mail.mime.charset");
920            if (charset != null) {
921                return charset;
922            }
923    
924            // get the Java-defined default and map back to a MIME name.
925            return mimeCharset(SessionUtil.getProperty("file.encoding", "8859_1"));
926        }
927    
928    
929        /**
930         * Load the default mapping tables used by the javaCharset()
931         * and mimeCharset() methods.  By default, these tables are
932         * loaded from the /META-INF/javamail.charset.map file.  If
933         * something goes wrong loading that file, we configure things
934         * with a default mapping table (which just happens to mimic
935         * what's in the default mapping file).
936         */
937        static private void loadCharacterSetMappings() {
938            java2mime = new HashMap();
939            mime2java = new HashMap();
940    
941    
942            // normally, these come from a character map file contained in the jar file.
943            try {
944                InputStream map = javax.mail.internet.MimeUtility.class.getResourceAsStream("/META-INF/javamail.charset.map");
945    
946                if (map != null) {
947                    // get a reader for this so we can load.
948                    BufferedReader reader = new BufferedReader(new InputStreamReader(map));
949    
950                    readMappings(reader, java2mime);
951                    readMappings(reader, mime2java);
952                }
953            } catch (Exception e) {
954            }
955    
956            // if any sort of error occurred reading the preferred file version, we could end up with empty
957            // mapping tables.  This could cause all sorts of difficulty, so ensure they are populated with at
958            // least a reasonable set of defaults.
959    
960            // these mappings echo what's in the default file.
961            if (java2mime.isEmpty()) {
962                java2mime.put("8859_1", "ISO-8859-1");
963                java2mime.put("iso8859_1", "ISO-8859-1");
964                java2mime.put("iso8859-1", "ISO-8859-1");
965    
966                java2mime.put("8859_2", "ISO-8859-2");
967                java2mime.put("iso8859_2", "ISO-8859-2");
968                java2mime.put("iso8859-2", "ISO-8859-2");
969    
970                java2mime.put("8859_3", "ISO-8859-3");
971                java2mime.put("iso8859_3", "ISO-8859-3");
972                java2mime.put("iso8859-3", "ISO-8859-3");
973    
974                java2mime.put("8859_4", "ISO-8859-4");
975                java2mime.put("iso8859_4", "ISO-8859-4");
976                java2mime.put("iso8859-4", "ISO-8859-4");
977    
978                java2mime.put("8859_5", "ISO-8859-5");
979                java2mime.put("iso8859_5", "ISO-8859-5");
980                java2mime.put("iso8859-5", "ISO-8859-5");
981    
982                java2mime.put ("8859_6", "ISO-8859-6");
983                java2mime.put("iso8859_6", "ISO-8859-6");
984                java2mime.put("iso8859-6", "ISO-8859-6");
985    
986                java2mime.put("8859_7", "ISO-8859-7");
987                java2mime.put("iso8859_7", "ISO-8859-7");
988                java2mime.put("iso8859-7", "ISO-8859-7");
989    
990                java2mime.put("8859_8", "ISO-8859-8");
991                java2mime.put("iso8859_8", "ISO-8859-8");
992                java2mime.put("iso8859-8", "ISO-8859-8");
993    
994                java2mime.put("8859_9", "ISO-8859-9");
995                java2mime.put("iso8859_9", "ISO-8859-9");
996                java2mime.put("iso8859-9", "ISO-8859-9");
997    
998                java2mime.put("sjis", "Shift_JIS");
999                java2mime.put ("jis", "ISO-2022-JP");
1000                java2mime.put("iso2022jp", "ISO-2022-JP");
1001                java2mime.put("euc_jp", "euc-jp");
1002                java2mime.put("koi8_r", "koi8-r");
1003                java2mime.put("euc_cn", "euc-cn");
1004                java2mime.put("euc_tw", "euc-tw");
1005                java2mime.put("euc_kr", "euc-kr");
1006            }
1007    
1008            if (mime2java.isEmpty ()) {
1009                mime2java.put("iso-2022-cn", "ISO2022CN");
1010                mime2java.put("iso-2022-kr", "ISO2022KR");
1011                mime2java.put("utf-8", "UTF8");
1012                mime2java.put("utf8", "UTF8");
1013                mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
1014                mime2java.put("ja_jp.eucjp", "EUCJIS");
1015                mime2java.put ("euc-kr", "KSC5601");
1016                mime2java.put("euckr", "KSC5601");
1017                mime2java.put("us-ascii", "ISO-8859-1");
1018                mime2java.put("x-us-ascii", "ISO-8859-1");
1019            }
1020        }
1021    
1022    
1023        /**
1024         * Read a section of a character map table and populate the
1025         * target mapping table with the information.  The table end
1026         * is marked by a line starting with "--" and also ending with
1027         * "--".  Blank lines and comment lines (beginning with '#') are
1028         * ignored.
1029         *
1030         * @param reader The source of the file information.
1031         * @param table  The mapping table used to store the information.
1032         */
1033        static private void readMappings(BufferedReader reader, Map table) throws IOException {
1034            // process lines to the EOF or the end of table marker.
1035            while (true) {
1036                String line = reader.readLine();
1037                // no line returned is an EOF
1038                if (line == null) {
1039                    return;
1040                }
1041    
1042                // trim so we're not messed up by trailing blanks
1043                line = line.trim();
1044    
1045                if (line.length() == 0 || line.startsWith("#")) {
1046                    continue;
1047                }
1048    
1049                // stop processing if this is the end-of-table marker.
1050                if (line.startsWith("--") && line.endsWith("--")) {
1051                    return;
1052                }
1053    
1054                // we allow either blanks or tabs as token delimiters.
1055                StringTokenizer tokenizer = new StringTokenizer(line, " \t");
1056    
1057                try {
1058                    String from = tokenizer.nextToken().toLowerCase();
1059                    String to = tokenizer.nextToken();
1060    
1061                    table.put(from, to);
1062                } catch (NoSuchElementException e) {
1063                    // just ignore the line if invalid.
1064                }
1065            }
1066        }
1067    
1068    
1069        /**
1070         * Perform RFC 2047 text folding on a string of text.
1071         *
1072         * @param used   The amount of text already "used up" on this line.  This is
1073         *               typically the length of a message header that this text
1074         *               get getting added to.
1075         * @param s      The text to fold.
1076         *
1077         * @return The input text, with linebreaks inserted at appropriate fold points.
1078         */
1079        public static String fold(int used, String s) {
1080            // if folding is disable, unfolding is also.  Return the string unchanged.
1081            if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) {
1082                return s;
1083            }
1084    
1085            int end;
1086    
1087            // now we need to strip off any trailing "whitespace", where whitespace is blanks, tabs,
1088            // and line break characters.
1089            for (end = s.length() - 1; end >= 0; end--) {
1090                int ch = s.charAt(end);
1091                if (ch != ' ' && ch != '\t' ) {
1092                    break;
1093                }
1094            }
1095    
1096            // did we actually find something to remove?  Shorten the String to the trimmed length
1097            if (end != s.length() - 1) {
1098                s = s.substring(0, end + 1);
1099            }
1100    
1101            // does the string as it exists now not require folding?  We can just had that back right off.
1102            if (s.length() + used <= FOLD_THRESHOLD) {
1103                return s;
1104            }
1105    
1106            // get a buffer for the length of the string, plus room for a few line breaks.
1107            // these are soft line breaks, so we generally need more that just the line breaks (an escape +
1108            // CR + LF + leading space on next line);
1109            StringBuffer newString = new StringBuffer(s.length() + 8);
1110    
1111    
1112            // now keep chopping this down until we've accomplished what we need.
1113            while (used + s.length() > FOLD_THRESHOLD) {
1114                int breakPoint = -1;
1115                char breakChar = 0;
1116    
1117                // now scan for the next place where we can break.
1118                for (int i = 0; i < s.length(); i++) {
1119                    // have we passed the fold limit?
1120                    if (used + i > FOLD_THRESHOLD) {
1121                        // if we've already seen a blank, then stop now.  Otherwise
1122                        // we keep going until we hit a fold point.
1123                        if (breakPoint != -1) {
1124                            break;
1125                        }
1126                    }
1127                    char ch = s.charAt(i);
1128    
1129                    // a white space character?
1130                    if (ch == ' ' || ch == '\t') {
1131                        // this might be a run of white space, so skip over those now.
1132                        breakPoint = i;
1133                        // we need to maintain the same character type after the inserted linebreak.
1134                        breakChar = ch;
1135                        i++;
1136                        while (i < s.length()) {
1137                            ch = s.charAt(i);
1138                            if (ch != ' ' && ch != '\t') {
1139                                break;
1140                            }
1141                            i++;
1142                        }
1143                    }
1144                    // found an embedded new line.  Escape this so that the unfolding process preserves it.
1145                    else if (ch == '\n') {
1146                        newString.append('\\');
1147                        newString.append('\n');
1148                    }
1149                    else if (ch == '\r') {
1150                        newString.append('\\');
1151                        newString.append('\n');
1152                        i++;
1153                        // if this is a CRLF pair, add the second char also
1154                        if (i < s.length() && s.charAt(i) == '\n') {
1155                            newString.append('\r');
1156                        }
1157                    }
1158    
1159                }
1160                // no fold point found, we punt, append the remainder and leave.
1161                if (breakPoint == -1) {
1162                    newString.append(s);
1163                    return newString.toString();
1164                }
1165                newString.append(s.substring(0, breakPoint));
1166                newString.append("\r\n");
1167                newString.append(breakChar);
1168                // chop the string
1169                s = s.substring(breakPoint + 1);
1170                // start again, and we've used the first char of the limit already with the whitespace char.
1171                used = 1;
1172            }
1173    
1174            // add on the remainder, and return
1175            newString.append(s);
1176            return newString.toString();
1177        }
1178    
1179        /**
1180         * Unfold a folded string.  The unfolding process will remove
1181         * any line breaks that are not escaped and which are also followed
1182         * by whitespace characters.
1183         *
1184         * @param s      The folded string.
1185         *
1186         * @return A new string with unfolding rules applied.
1187         */
1188        public static String unfold(String s) {
1189            // if folding is disable, unfolding is also.  Return the string unchanged.
1190            if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) {
1191                return s;
1192            }
1193    
1194            // if there are no line break characters in the string, we can just return this.
1195            if (s.indexOf('\n') < 0 && s.indexOf('\r') < 0) {
1196                return s;
1197            }
1198    
1199            // we need to scan and fix things up.
1200            int length = s.length();
1201    
1202            StringBuffer newString = new StringBuffer(length);
1203    
1204            // scan the entire string
1205            for (int i = 0; i < length; i++) {
1206                char ch = s.charAt(i);
1207    
1208                // we have a backslash.  In folded strings, escape characters are only processed as such if
1209                // they preceed line breaks.  Otherwise, we leave it be.
1210                if (ch == '\\') {
1211                    // escape at the very end?  Just add the character.
1212                    if (i == length - 1) {
1213                        newString.append(ch);
1214                    }
1215                    else {
1216                        int nextChar = s.charAt(i + 1);
1217    
1218                        // naked newline?  Add the new line to the buffer, and skip the escape char.
1219                        if (nextChar == '\n') {
1220                            newString.append('\n');
1221                            i++;
1222                        }
1223                        else if (nextChar == '\r') {
1224                            // just the CR left?  Add it, removing the escape.
1225                            if (i == length - 2 || s.charAt(i + 2) != '\r') {
1226                                newString.append('\r');
1227                                i++;
1228                            }
1229                            else {
1230                                // toss the escape, add both parts of the CRLF, and skip over two chars.
1231                                newString.append('\r');
1232                                newString.append('\n');
1233                                i += 2;
1234                            }
1235                        }
1236                        else {
1237                            // an escape for another purpose, just copy it over.
1238                            newString.append(ch);
1239                        }
1240                    }
1241                }
1242                // we have an unescaped line break
1243                else if (ch == '\n' || ch == '\r') {
1244                    // remember the position in case we need to backtrack.
1245                    int lineBreak = i;
1246                    boolean CRLF = false;
1247    
1248                    if (ch == '\r') {
1249                        // check to see if we need to step over this.
1250                        if (i < length - 1 && s.charAt(i + 1) == '\n') {
1251                            i++;
1252                            // flag the type so we know what we might need to preserve.
1253                            CRLF = true;
1254                        }
1255                    }
1256    
1257                    // get a temp position scanner.
1258                    int scan = i + 1;
1259    
1260                    // does a blank follow this new line?  we need to scrap the new line and reduce the leading blanks
1261                    // down to a single blank.
1262                    if (scan < length && s.charAt(scan) == ' ') {
1263                        // add the character
1264                        newString.append(' ');
1265    
1266                        // scan over the rest of the blanks
1267                        i = scan + 1;
1268                        while (i < length && s.charAt(i) == ' ') {
1269                            i++;
1270                        }
1271                        // we'll increment down below, so back up to the last blank as the current char.
1272                        i--;
1273                    }
1274                    else {
1275                        // we must keep this line break.  Append the appropriate style.
1276                        if (CRLF) {
1277                            newString.append("\r\n");
1278                        }
1279                        else {
1280                            newString.append(ch);
1281                        }
1282                    }
1283                }
1284                else {
1285                    // just a normal, ordinary character
1286                    newString.append(ch);
1287                }
1288            }
1289            return newString.toString();
1290        }
1291    }
1292    
1293    
1294    /**
1295     * Utility class for examining content information written out
1296     * by a DataHandler object.  This stream gathers statistics on
1297     * the stream so it can make transfer encoding determinations.
1298     */
1299    class ContentCheckingOutputStream extends OutputStream {
1300        private int asciiChars = 0;
1301        private int nonAsciiChars = 0;
1302        private boolean containsLongLines = false;
1303        private boolean containsMalformedEOL = false;
1304        private int previousChar = 0;
1305        private int span = 0;
1306    
1307        ContentCheckingOutputStream() {
1308        }
1309    
1310        public void write(byte[] data) throws IOException {
1311            write(data, 0, data.length);
1312        }
1313    
1314        public void write(byte[] data, int offset, int length) throws IOException {
1315            for (int i = 0; i < length; i++) {
1316                write(data[offset + i]);
1317            }
1318        }
1319    
1320        public void write(int ch) {
1321            // we found a linebreak.  Reset the line length counters on either one.  We don't
1322            // really need to validate here.
1323            if (ch == '\n' || ch == '\r') {
1324                // we found a newline, this is only valid if the previous char was the '\r'
1325                if (ch == '\n') {
1326                    // malformed linebreak?  force this to base64 encoding.
1327                    if (previousChar != '\r') {
1328                        containsMalformedEOL = true;
1329                    }
1330                }
1331                // hit a line end, reset our line length counter
1332                span = 0;
1333            }
1334            else {
1335                span++;
1336                // the text has long lines, we can't transfer this as unencoded text.
1337                if (span > 998) {
1338                    containsLongLines = true;
1339                }
1340    
1341                // non-ascii character, we have to transfer this in binary.
1342                if (!ASCIIUtil.isAscii(ch)) {
1343                    nonAsciiChars++;
1344                }
1345                else {
1346                    asciiChars++;
1347                }
1348            }
1349            previousChar = ch;
1350        }
1351    
1352    
1353        public String getBinaryTransferEncoding() {
1354            if (nonAsciiChars != 0 || containsLongLines || containsMalformedEOL) {
1355                return "base64";
1356            }
1357            else {
1358                return "7bit";
1359            }
1360        }
1361    
1362        public String getTextTransferEncoding() {
1363            // looking good so far, only valid chars here.
1364            if (nonAsciiChars == 0) {
1365                // does this contain long text lines?  We need to use a Q-P encoding which will
1366                // be only slightly longer, but handles folding the longer lines.
1367                if (containsLongLines) {
1368                    return "quoted-printable";
1369                }
1370                else {
1371                    // ideal!  Easiest one to handle.
1372                    return "7bit";
1373                }
1374            }
1375            else {
1376                // mostly characters requiring encoding?  Base64 is our best bet.
1377                if (nonAsciiChars > asciiChars) {
1378                    return "base64";
1379                }
1380                else {
1381                    // Q-P encoding will use fewer bytes than the full Base64.
1382                    return "quoted-printable";
1383                }
1384            }
1385        }
1386    }