001 /**
002 *
003 * Copyright 2003-2006 The Apache Software Foundation
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package javax.mail.internet;
019
020 import java.io.BufferedInputStream;
021 import java.io.BufferedReader;
022 import java.io.ByteArrayInputStream;
023 import java.io.ByteArrayOutputStream;
024 import java.io.IOException;
025 import java.io.InputStream;
026 import java.io.InputStreamReader;
027 import java.io.OutputStream;
028 import java.io.UnsupportedEncodingException;
029 import java.util.HashMap;
030 import java.util.Map;
031 import java.util.NoSuchElementException;
032 import java.util.StringTokenizer;
033
034 import javax.activation.DataHandler;
035 import javax.activation.DataSource;
036 import javax.mail.MessagingException;
037
038 import org.apache.geronimo.mail.util.ASCIIUtil;
039 import org.apache.geronimo.mail.util.Base64;
040 import org.apache.geronimo.mail.util.Base64DecoderStream;
041 import org.apache.geronimo.mail.util.Base64Encoder;
042 import org.apache.geronimo.mail.util.Base64EncoderStream;
043 import org.apache.geronimo.mail.util.QuotedPrintableDecoderStream;
044 import org.apache.geronimo.mail.util.QuotedPrintableEncoderStream;
045 import org.apache.geronimo.mail.util.QuotedPrintableEncoder;
046 import org.apache.geronimo.mail.util.QuotedPrintable;
047 import org.apache.geronimo.mail.util.SessionUtil;
048 import org.apache.geronimo.mail.util.UUDecoderStream;
049 import org.apache.geronimo.mail.util.UUEncoderStream;
050
051 // encodings include "base64", "quoted-printable", "7bit", "8bit" and "binary".
052 // In addition, "uuencode" is also supported. The
053
054 /**
055 * @version $Rev: 421852 $ $Date: 2006-07-14 03:02:19 -0700 (Fri, 14 Jul 2006) $
056 */
057 public class MimeUtility {
058
059 private static final String MIME_FOLDENCODEDWORDS = "mail.mime.foldencodedwords";
060 private static final String MIME_DECODE_TEXT_STRICT = "mail.mime.decodetext.strict";
061 private static final String MIME_FOLDTEXT = "mail.mime.foldtext";
062 private static final int FOLD_THRESHOLD = 76;
063
064 private MimeUtility() {
065 }
066
067 public static final int ALL = -1;
068
069 private static String defaultJavaCharset;
070 private static String escapedChars = "\"\\\r\n";
071 private static String linearWhiteSpace = " \t\r\n";
072
073 private static String QP_WORD_SPECIALS = "=_?\"#$%&'(),.:;<>@[\\]^`{|}~";
074 private static String QP_TEXT_SPECIALS = "=_?";
075
076 // the javamail spec includes the ability to map java encoding names to MIME-specified names. Normally,
077 // these values are loaded from a character mapping file.
078 private static Map java2mime;
079 private static Map mime2java;
080
081 static {
082 // we need to load the mapping tables used by javaCharset() and mimeCharset().
083 loadCharacterSetMappings();
084 }
085
086 public static InputStream decode(InputStream in, String encoding) throws MessagingException {
087 encoding = encoding.toLowerCase();
088
089 // some encodies are just pass-throughs, with no real decoding.
090 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
091 return in;
092 }
093 else if (encoding.equals("base64")) {
094 return new Base64DecoderStream(in);
095 }
096 // UUEncode is known by a couple historical extension names too.
097 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
098 return new UUDecoderStream(in);
099 }
100 else if (encoding.equals("quoted-printable")) {
101 return new QuotedPrintableDecoderStream(in);
102 }
103 else {
104 throw new MessagingException("Unknown encoding " + encoding);
105 }
106 }
107
108 /**
109 * Decode a string of text obtained from a mail header into
110 * it's proper form. The text generally will consist of a
111 * string of tokens, some of which may be encoded using
112 * base64 encoding.
113 *
114 * @param text The text to decode.
115 *
116 * @return The decoded test string.
117 * @exception UnsupportedEncodingException
118 */
119 public static String decodeText(String text) throws UnsupportedEncodingException {
120 // if the text contains any encoded tokens, those tokens will be marked with "=?". If the
121 // source string doesn't contain that sequent, no decoding is required.
122 if (text.indexOf("=?") < 0) {
123 return text;
124 }
125
126 // we have two sets of rules we can apply.
127 if (!SessionUtil.getBooleanProperty(MIME_DECODE_TEXT_STRICT, true)) {
128 return decodeTextNonStrict(text);
129 }
130
131 int offset = 0;
132 int endOffset = text.length();
133
134 int startWhiteSpace = -1;
135 int endWhiteSpace = -1;
136
137 StringBuffer decodedText = new StringBuffer(text.length());
138
139 boolean previousTokenEncoded = false;
140
141 while (offset < endOffset) {
142 char ch = text.charAt(offset);
143
144 // is this a whitespace character?
145 if (linearWhiteSpace.indexOf(ch) != -1) {
146 startWhiteSpace = offset;
147 while (offset < endOffset) {
148 // step over the white space characters.
149 ch = text.charAt(offset);
150 if (linearWhiteSpace.indexOf(ch) != -1) {
151 offset++;
152 }
153 else {
154 // record the location of the first non lwsp and drop down to process the
155 // token characters.
156 endWhiteSpace = offset;
157 break;
158 }
159 }
160 }
161 else {
162 // we have a word token. We need to scan over the word and then try to parse it.
163 int wordStart = offset;
164
165 while (offset < endOffset) {
166 // step over the white space characters.
167 ch = text.charAt(offset);
168 if (linearWhiteSpace.indexOf(ch) == -1) {
169 offset++;
170 }
171 else {
172 break;
173 }
174
175 //NB: Trailing whitespace on these header strings will just be discarded.
176 }
177 // pull out the word token.
178 String word = text.substring(wordStart, offset);
179 // is the token encoded? decode the word
180 if (word.startsWith("=?")) {
181 try {
182 // if this gives a parsing failure, treat it like a non-encoded word.
183 String decodedWord = decodeWord(word);
184
185 // are any whitespace characters significant? Append 'em if we've got 'em.
186 if (!previousTokenEncoded) {
187 if (startWhiteSpace != -1) {
188 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
189 startWhiteSpace = -1;
190 }
191 }
192 // this is definitely a decoded token.
193 previousTokenEncoded = true;
194 // and add this to the text.
195 decodedText.append(decodedWord);
196 // we continue parsing from here...we allow parsing errors to fall through
197 // and get handled as normal text.
198 continue;
199
200 } catch (ParseException e) {
201 }
202 }
203 // this is a normal token, so it doesn't matter what the previous token was. Add the white space
204 // if we have it.
205 if (startWhiteSpace != -1) {
206 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
207 startWhiteSpace = -1;
208 }
209 // this is not a decoded token.
210 previousTokenEncoded = false;
211 decodedText.append(word);
212 }
213 }
214
215 return decodedText.toString();
216 }
217
218
219 /**
220 * Decode a string of text obtained from a mail header into
221 * it's proper form. The text generally will consist of a
222 * string of tokens, some of which may be encoded using
223 * base64 encoding. This is for non-strict decoded for mailers that
224 * violate the RFC 2047 restriction that decoded tokens must be delimited
225 * by linear white space. This will scan tokens looking for inner tokens
226 * enclosed in "=?" -- "?=" pairs.
227 *
228 * @param text The text to decode.
229 *
230 * @return The decoded test string.
231 * @exception UnsupportedEncodingException
232 */
233 private static String decodeTextNonStrict(String text) throws UnsupportedEncodingException {
234 int offset = 0;
235 int endOffset = text.length();
236
237 int startWhiteSpace = -1;
238 int endWhiteSpace = -1;
239
240 StringBuffer decodedText = new StringBuffer(text.length());
241
242 boolean previousTokenEncoded = false;
243
244 while (offset < endOffset) {
245 char ch = text.charAt(offset);
246
247 // is this a whitespace character?
248 if (linearWhiteSpace.indexOf(ch) != -1) {
249 startWhiteSpace = offset;
250 while (offset < endOffset) {
251 // step over the white space characters.
252 ch = text.charAt(offset);
253 if (linearWhiteSpace.indexOf(ch) != -1) {
254 offset++;
255 }
256 else {
257 // record the location of the first non lwsp and drop down to process the
258 // token characters.
259 endWhiteSpace = offset;
260 break;
261 }
262 }
263 }
264 else {
265 // we're at the start of a word token. We potentially need to break this up into subtokens
266 int wordStart = offset;
267
268 while (offset < endOffset) {
269 // step over the white space characters.
270 ch = text.charAt(offset);
271 if (linearWhiteSpace.indexOf(ch) == -1) {
272 offset++;
273 }
274 else {
275 break;
276 }
277
278 //NB: Trailing whitespace on these header strings will just be discarded.
279 }
280 // pull out the word token.
281 String word = text.substring(wordStart, offset);
282
283 int decodeStart = 0;
284
285 // now scan and process each of the bits within here.
286 while (decodeStart < word.length()) {
287 int tokenStart = word.indexOf("=?", decodeStart);
288 if (tokenStart == -1) {
289 // this is a normal token, so it doesn't matter what the previous token was. Add the white space
290 // if we have it.
291 if (startWhiteSpace != -1) {
292 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
293 startWhiteSpace = -1;
294 }
295 // this is not a decoded token.
296 previousTokenEncoded = false;
297 decodedText.append(word.substring(decodeStart));
298 // we're finished.
299 break;
300 }
301 // we have something to process
302 else {
303 // we might have a normal token preceeding this.
304 if (tokenStart != decodeStart) {
305 // this is a normal token, so it doesn't matter what the previous token was. Add the white space
306 // if we have it.
307 if (startWhiteSpace != -1) {
308 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
309 startWhiteSpace = -1;
310 }
311 // this is not a decoded token.
312 previousTokenEncoded = false;
313 decodedText.append(word.substring(decodeStart, tokenStart));
314 }
315
316 // now find the end marker.
317 int tokenEnd = word.indexOf("?=", tokenStart);
318 // sigh, an invalid token. Treat this as plain text.
319 if (tokenEnd == -1) {
320 // this is a normal token, so it doesn't matter what the previous token was. Add the white space
321 // if we have it.
322 if (startWhiteSpace != -1) {
323 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
324 startWhiteSpace = -1;
325 }
326 // this is not a decoded token.
327 previousTokenEncoded = false;
328 decodedText.append(word.substring(tokenStart));
329 // we're finished.
330 break;
331 }
332 else {
333 // update our ticker
334 decodeStart = tokenEnd + 2;
335
336 String token = word.substring(tokenStart, tokenEnd);
337 try {
338 // if this gives a parsing failure, treat it like a non-encoded word.
339 String decodedWord = decodeWord(token);
340
341 // are any whitespace characters significant? Append 'em if we've got 'em.
342 if (!previousTokenEncoded) {
343 if (startWhiteSpace != -1) {
344 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
345 startWhiteSpace = -1;
346 }
347 }
348 // this is definitely a decoded token.
349 previousTokenEncoded = true;
350 // and add this to the text.
351 decodedText.append(decodedWord);
352 // we continue parsing from here...we allow parsing errors to fall through
353 // and get handled as normal text.
354 continue;
355
356 } catch (ParseException e) {
357 }
358 // this is a normal token, so it doesn't matter what the previous token was. Add the white space
359 // if we have it.
360 if (startWhiteSpace != -1) {
361 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
362 startWhiteSpace = -1;
363 }
364 // this is not a decoded token.
365 previousTokenEncoded = false;
366 decodedText.append(token);
367 }
368 }
369 }
370 }
371 }
372
373 return decodedText.toString();
374 }
375
376 /**
377 * Parse a string using the RFC 2047 rules for an "encoded-word"
378 * type. This encoding has the syntax:
379 *
380 * encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
381 *
382 * @param word The possibly encoded word value.
383 *
384 * @return The decoded word.
385 * @exception ParseException
386 * @exception UnsupportedEncodingException
387 */
388 public static String decodeWord(String word) throws ParseException, UnsupportedEncodingException {
389 // encoded words start with the characters "=?". If this not an encoded word, we throw a
390 // ParseException for the caller.
391
392 if (!word.startsWith("=?")) {
393 throw new ParseException("Invalid RFC 2047 encoded-word: " + word);
394 }
395
396 int charsetPos = word.indexOf('?', 2);
397 if (charsetPos == -1) {
398 throw new ParseException("Missing charset in RFC 2047 encoded-word: " + word);
399 }
400
401 // pull out the character set information (this is the MIME name at this point).
402 String charset = word.substring(2, charsetPos).toLowerCase();
403
404 // now pull out the encoding token the same way.
405 int encodingPos = word.indexOf('?', charsetPos + 1);
406 if (encodingPos == -1) {
407 throw new ParseException("Missing encoding in RFC 2047 encoded-word: " + word);
408 }
409
410 String encoding = word.substring(charsetPos + 1, encodingPos);
411
412 // and finally the encoded text.
413 int encodedTextPos = word.indexOf("?=", encodingPos + 1);
414 if (encodedTextPos == -1) {
415 throw new ParseException("Missing encoded text in RFC 2047 encoded-word: " + word);
416 }
417
418 String encodedText = word.substring(encodingPos + 1, encodedTextPos);
419
420 // seems a bit silly to encode a null string, but easy to deal with.
421 if (encodedText.length() == 0) {
422 return "";
423 }
424
425 try {
426 // the decoder writes directly to an output stream.
427 ByteArrayOutputStream out = new ByteArrayOutputStream(encodedText.length());
428
429 byte[] encodedData = encodedText.getBytes("US-ASCII");
430
431 // Base64 encoded?
432 if (encoding.equals("B")) {
433 Base64.decode(encodedData, out);
434 }
435 // maybe quoted printable.
436 else if (encoding.equals("Q")) {
437 QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
438 dataEncoder.decodeWord(encodedData, out);
439 }
440 else {
441 throw new UnsupportedEncodingException("Unknown RFC 2047 encoding: " + encoding);
442 }
443 // get the decoded byte data and convert into a string.
444 byte[] decodedData = out.toByteArray();
445 return new String(decodedData, javaCharset(charset));
446 } catch (IOException e) {
447 throw new UnsupportedEncodingException("Invalid RFC 2047 encoding");
448 }
449
450 }
451
452 /**
453 * Wrap an encoder around a given output stream.
454 *
455 * @param out The output stream to wrap.
456 * @param encoding The name of the encoding.
457 *
458 * @return A instance of FilterOutputStream that manages on the fly
459 * encoding for the requested encoding type.
460 * @exception MessagingException
461 */
462 public static OutputStream encode(OutputStream out, String encoding) throws MessagingException {
463 // no encoding specified, so assume it goes out unchanged.
464 if (encoding == null) {
465 return out;
466 }
467
468 encoding = encoding.toLowerCase();
469
470 // some encodies are just pass-throughs, with no real decoding.
471 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
472 return out;
473 }
474 else if (encoding.equals("base64")) {
475 return new Base64EncoderStream(out);
476 }
477 // UUEncode is known by a couple historical extension names too.
478 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
479 return new UUEncoderStream(out);
480 }
481 else if (encoding.equals("quoted-printable")) {
482 return new QuotedPrintableEncoderStream(out);
483 }
484 else {
485 throw new MessagingException("Unknown encoding " + encoding);
486 }
487 }
488
489 /**
490 * Wrap an encoder around a given output stream.
491 *
492 * @param out The output stream to wrap.
493 * @param encoding The name of the encoding.
494 * @param filename The filename of the data being sent (only used for UUEncode).
495 *
496 * @return A instance of FilterOutputStream that manages on the fly
497 * encoding for the requested encoding type.
498 * @exception MessagingException
499 */
500 public static OutputStream encode(OutputStream out, String encoding, String filename) throws MessagingException {
501 encoding = encoding.toLowerCase();
502
503 // some encodies are just pass-throughs, with no real decoding.
504 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
505 return out;
506 }
507 else if (encoding.equals("base64")) {
508 return new Base64EncoderStream(out);
509 }
510 // UUEncode is known by a couple historical extension names too.
511 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
512 return new UUEncoderStream(out, filename);
513 }
514 else if (encoding.equals("quoted-printable")) {
515 return new QuotedPrintableEncoderStream(out);
516 }
517 else {
518 throw new MessagingException("Unknown encoding " + encoding);
519 }
520 }
521
522
523 public static String encodeText(String word) throws UnsupportedEncodingException {
524 return encodeText(word, null, null);
525 }
526
527 public static String encodeText(String word, String charset, String encoding) throws UnsupportedEncodingException {
528 return encodeWord(word, charset, encoding, false);
529 }
530
531 public static String encodeWord(String word) throws UnsupportedEncodingException {
532 return encodeWord(word, null, null);
533 }
534
535 public static String encodeWord(String word, String charset, String encoding) throws UnsupportedEncodingException {
536 return encodeWord(word, charset, encoding, true);
537 }
538
539
540 private static String encodeWord(String word, String charset, String encoding, boolean encodingWord) throws UnsupportedEncodingException {
541
542 // figure out what we need to encode this.
543 String encoder = ASCIIUtil.getTextTransferEncoding(word);
544 // all ascii? We can return this directly,
545 if (encoder.equals("7bit")) {
546 return word;
547 }
548
549 // if not given a charset, use the default.
550 if (charset == null) {
551 charset = getDefaultMIMECharset();
552 }
553
554 // sort out the encoder. If not explicitly given, use the best guess we've already established.
555 if (encoding != null) {
556 if (encoding.equalsIgnoreCase("B")) {
557 encoder = "base64";
558 }
559 else if (encoding.equalsIgnoreCase("Q")) {
560 encoder = "quoted-printable";
561 }
562 else {
563 throw new UnsupportedEncodingException("Unknown transfer encoding: " + encoding);
564 }
565 }
566
567 try {
568 // get the string bytes in the correct source charset
569 InputStream in = new ByteArrayInputStream(word.getBytes( javaCharset(charset)));
570 ByteArrayOutputStream out = new ByteArrayOutputStream();
571
572 if (encoder.equals("base64")) {
573 Base64Encoder dataEncoder = new Base64Encoder();
574 dataEncoder.encodeWord(in, charset, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false));
575 }
576 else {
577 QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
578 dataEncoder.encodeWord(in, charset, encodingWord ? QP_WORD_SPECIALS : QP_TEXT_SPECIALS, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false));
579 }
580
581 byte[] bytes = out.toByteArray();
582 return new String(bytes);
583 } catch (IOException e) {
584 throw new UnsupportedEncodingException("Invalid encoding");
585 }
586 }
587
588
589 /**
590 * Examine the content of a data source and decide what type
591 * of transfer encoding should be used. For text streams,
592 * we'll decided between 7bit, quoted-printable, and base64.
593 * For binary content types, we'll use either 7bit or base64.
594 *
595 * @param handler The DataHandler associated with the content.
596 *
597 * @return The string name of an encoding used to transfer the content.
598 */
599 public static String getEncoding(DataHandler handler) {
600
601
602 // if this handler has an associated data source, we can read directly from the
603 // data source to make this judgment. This is generally MUCH faster than asking the
604 // DataHandler to write out the data for us.
605 DataSource ds = handler.getDataSource();
606 if (ds != null) {
607 return getEncoding(ds);
608 }
609
610 try {
611 // get a parser that allows us to make comparisons.
612 ContentType content = new ContentType(ds.getContentType());
613
614 // The only access to the content bytes at this point is by asking the handler to write
615 // the information out to a stream. We're going to pipe this through a special stream
616 // that examines the bytes as they go by.
617 ContentCheckingOutputStream checker = new ContentCheckingOutputStream();
618
619 handler.writeTo(checker);
620
621 // figure this out based on whether we believe this to be a text type or not.
622 if (content.match("text/*")) {
623 return checker.getTextTransferEncoding();
624 }
625 else {
626 return checker.getBinaryTransferEncoding();
627 }
628
629 } catch (Exception e) {
630 // any unexpected I/O exceptions we'll force to a "safe" fallback position.
631 return "base64";
632 }
633 }
634
635
636 /**
637 * Determine the what transfer encoding should be used for
638 * data retrieved from a DataSource.
639 *
640 * @param source The DataSource for the transmitted data.
641 *
642 * @return The string name of the encoding form that should be used for
643 * the data.
644 */
645 public static String getEncoding(DataSource source) {
646 InputStream in = null;
647
648 try {
649 // get a parser that allows us to make comparisons.
650 ContentType content = new ContentType(source.getContentType());
651
652 // we're probably going to have to scan the data.
653 in = source.getInputStream();
654
655 if (!content.match("text/*")) {
656 // Not purporting to be a text type? Examine the content to see we might be able to
657 // at least pretend it is an ascii type.
658 return ASCIIUtil.getBinaryTransferEncoding(in);
659 }
660 else {
661 return ASCIIUtil.getTextTransferEncoding(in);
662 }
663 } catch (Exception e) {
664 // this was a problem...not sure what makes sense here, so we'll assume it's binary
665 // and we need to transfer this using Base64 encoding.
666 return "base64";
667 } finally {
668 // make sure we close the stream
669 try {
670 if (in != null) {
671 in.close();
672 }
673 } catch (IOException e) {
674 }
675 }
676 }
677
678
679 /**
680 * Quote a "word" value. If the word contains any character from
681 * the specified "specials" list, this value is returned as a
682 * quoted strong. Otherwise, it is returned unchanged (an "atom").
683 *
684 * @param word The word requiring quoting.
685 * @param specials The set of special characters that can't appear in an unquoted
686 * string.
687 *
688 * @return The quoted value. This will be unchanged if the word doesn't contain
689 * any of the designated special characters.
690 */
691 public static String quote(String word, String specials) {
692 int wordLength = word.length();
693 boolean requiresQuoting = false;
694 // scan the string looking for problem characters
695 for (int i =0; i < wordLength; i++) {
696 char ch = word.charAt(i);
697 // special escaped characters require escaping, which also implies quoting.
698 if (escapedChars.indexOf(ch) >= 0) {
699 return quoteAndEscapeString(word);
700 }
701 // now check for control characters or the designated special characters.
702 if (ch < 32 || ch >= 127 || specials.indexOf(ch) >= 0) {
703 // we know this requires quoting, but we still need to scan the entire string to
704 // see if contains chars that require escaping. Just go ahead and treat it as if it does.
705 return quoteAndEscapeString(word);
706 }
707 }
708 return word;
709 }
710
711 /**
712 * Take a string and return it as a formatted quoted string, with
713 * all characters requiring escaping handled properly.
714 *
715 * @param word The string to quote.
716 *
717 * @return The quoted string.
718 */
719 private static String quoteAndEscapeString(String word) {
720 int wordLength = word.length();
721 // allocate at least enough for the string and two quotes plus a reasonable number of escaped chars.
722 StringBuffer buffer = new StringBuffer(wordLength + 10);
723 // add the leading quote.
724 buffer.append('"');
725
726 for (int i = 0; i < wordLength; i++) {
727 char ch = word.charAt(i);
728 // is this an escaped char?
729 if (escapedChars.indexOf(ch) >= 0) {
730 // add the escape marker before appending.
731 buffer.append('\\');
732 }
733 buffer.append(ch);
734 }
735 // now the closing quote
736 buffer.append('"');
737 return buffer.toString();
738 }
739
740 /**
741 * Translate a MIME standard character set name into the Java
742 * equivalent.
743 *
744 * @param charset The MIME standard name.
745 *
746 * @return The Java equivalent for this name.
747 */
748 public static String javaCharset(String charset) {
749 // nothing in, nothing out.
750 if (charset == null) {
751 return null;
752 }
753
754 String mappedCharset = (String)mime2java.get(charset.toLowerCase());
755 // if there is no mapping, then the original name is used. Many of the MIME character set
756 // names map directly back into Java. The reverse isn't necessarily true.
757 return mappedCharset == null ? charset : mappedCharset;
758 }
759
760 /**
761 * Map a Java character set name into the MIME equivalent.
762 *
763 * @param charset The java character set name.
764 *
765 * @return The MIME standard equivalent for this character set name.
766 */
767 public static String mimeCharset(String charset) {
768 // nothing in, nothing out.
769 if (charset == null) {
770 return null;
771 }
772
773 String mappedCharset = (String)java2mime.get(charset.toLowerCase());
774 // if there is no mapping, then the original name is used. Many of the MIME character set
775 // names map directly back into Java. The reverse isn't necessarily true.
776 return mappedCharset == null ? charset : mappedCharset;
777 }
778
779
780 /**
781 * Get the default character set to use, in Java name format.
782 * This either be the value set with the mail.mime.charset
783 * system property or obtained from the file.encoding system
784 * property. If neither of these is set, we fall back to
785 * 8859_1 (basically US-ASCII).
786 *
787 * @return The character string value of the default character set.
788 */
789 public static String getDefaultJavaCharset() {
790 String charset = SessionUtil.getProperty("mail.mime.charset");
791 if (charset != null) {
792 return javaCharset(charset);
793 }
794 return SessionUtil.getProperty("file.encoding", "8859_1");
795 }
796
797 /**
798 * Get the default character set to use, in MIME name format.
799 * This either be the value set with the mail.mime.charset
800 * system property or obtained from the file.encoding system
801 * property. If neither of these is set, we fall back to
802 * 8859_1 (basically US-ASCII).
803 *
804 * @return The character string value of the default character set.
805 */
806 static String getDefaultMIMECharset() {
807 // if the property is specified, this can be used directly.
808 String charset = SessionUtil.getProperty("mail.mime.charset");
809 if (charset != null) {
810 return charset;
811 }
812
813 // get the Java-defined default and map back to a MIME name.
814 return mimeCharset(SessionUtil.getProperty("file.encoding", "8859_1"));
815 }
816
817
818 /**
819 * Load the default mapping tables used by the javaCharset()
820 * and mimeCharset() methods. By default, these tables are
821 * loaded from the /META-INF/javamail.charset.map file. If
822 * something goes wrong loading that file, we configure things
823 * with a default mapping table (which just happens to mimic
824 * what's in the default mapping file).
825 */
826 static private void loadCharacterSetMappings() {
827 java2mime = new HashMap();
828 mime2java = new HashMap();
829
830
831 // normally, these come from a character map file contained in the jar file.
832 try {
833 InputStream map = javax.mail.internet.MimeUtility.class.getResourceAsStream("/META-INF/javamail.charset.map");
834
835 if (map != null) {
836 // get a reader for this so we can load.
837 BufferedReader reader = new BufferedReader(new InputStreamReader(map));
838
839 readMappings(reader, java2mime);
840 readMappings(reader, mime2java);
841 }
842 } catch (Exception e) {
843 }
844
845 // if any sort of error occurred reading the preferred file version, we could end up with empty
846 // mapping tables. This could cause all sorts of difficulty, so ensure they are populated with at
847 // least a reasonable set of defaults.
848
849 // these mappings echo what's in the default file.
850 if (java2mime.isEmpty()) {
851 java2mime.put("8859_1", "ISO-8859-1");
852 java2mime.put("iso8859_1", "ISO-8859-1");
853 java2mime.put("iso8859-1", "ISO-8859-1");
854
855 java2mime.put("8859_2", "ISO-8859-2");
856 java2mime.put("iso8859_2", "ISO-8859-2");
857 java2mime.put("iso8859-2", "ISO-8859-2");
858
859 java2mime.put("8859_3", "ISO-8859-3");
860 java2mime.put("iso8859_3", "ISO-8859-3");
861 java2mime.put("iso8859-3", "ISO-8859-3");
862
863 java2mime.put("8859_4", "ISO-8859-4");
864 java2mime.put("iso8859_4", "ISO-8859-4");
865 java2mime.put("iso8859-4", "ISO-8859-4");
866
867 java2mime.put("8859_5", "ISO-8859-5");
868 java2mime.put("iso8859_5", "ISO-8859-5");
869 java2mime.put("iso8859-5", "ISO-8859-5");
870
871 java2mime.put ("8859_6", "ISO-8859-6");
872 java2mime.put("iso8859_6", "ISO-8859-6");
873 java2mime.put("iso8859-6", "ISO-8859-6");
874
875 java2mime.put("8859_7", "ISO-8859-7");
876 java2mime.put("iso8859_7", "ISO-8859-7");
877 java2mime.put("iso8859-7", "ISO-8859-7");
878
879 java2mime.put("8859_8", "ISO-8859-8");
880 java2mime.put("iso8859_8", "ISO-8859-8");
881 java2mime.put("iso8859-8", "ISO-8859-8");
882
883 java2mime.put("8859_9", "ISO-8859-9");
884 java2mime.put("iso8859_9", "ISO-8859-9");
885 java2mime.put("iso8859-9", "ISO-8859-9");
886
887 java2mime.put("sjis", "Shift_JIS");
888 java2mime.put ("jis", "ISO-2022-JP");
889 java2mime.put("iso2022jp", "ISO-2022-JP");
890 java2mime.put("euc_jp", "euc-jp");
891 java2mime.put("koi8_r", "koi8-r");
892 java2mime.put("euc_cn", "euc-cn");
893 java2mime.put("euc_tw", "euc-tw");
894 java2mime.put("euc_kr", "euc-kr");
895 }
896
897 if (mime2java.isEmpty ()) {
898 mime2java.put("iso-2022-cn", "ISO2022CN");
899 mime2java.put("iso-2022-kr", "ISO2022KR");
900 mime2java.put("utf-8", "UTF8");
901 mime2java.put("utf8", "UTF8");
902 mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
903 mime2java.put("ja_jp.eucjp", "EUCJIS");
904 mime2java.put ("euc-kr", "KSC5601");
905 mime2java.put("euckr", "KSC5601");
906 mime2java.put("us-ascii", "ISO-8859-1");
907 mime2java.put("x-us-ascii", "ISO-8859-1");
908 }
909 }
910
911
912 /**
913 * Read a section of a character map table and populate the
914 * target mapping table with the information. The table end
915 * is marked by a line starting with "--" and also ending with
916 * "--". Blank lines and comment lines (beginning with '#') are
917 * ignored.
918 *
919 * @param reader The source of the file information.
920 * @param table The mapping table used to store the information.
921 */
922 static private void readMappings(BufferedReader reader, Map table) throws IOException {
923 // process lines to the EOF or the end of table marker.
924 while (true) {
925 String line = reader.readLine();
926 // no line returned is an EOF
927 if (line == null) {
928 return;
929 }
930
931 // trim so we're not messed up by trailing blanks
932 line = line.trim();
933
934 if (line.length() == 0 || line.startsWith("#")) {
935 continue;
936 }
937
938 // stop processing if this is the end-of-table marker.
939 if (line.startsWith("--") && line.endsWith("--")) {
940 return;
941 }
942
943 // we allow either blanks or tabs as token delimiters.
944 StringTokenizer tokenizer = new StringTokenizer(line, " \t");
945
946 try {
947 String from = tokenizer.nextToken().toLowerCase();
948 String to = tokenizer.nextToken();
949
950 table.put(from, to);
951 } catch (NoSuchElementException e) {
952 // just ignore the line if invalid.
953 }
954 }
955 }
956
957
958 /**
959 * Perform RFC 2047 text folding on a string of text.
960 *
961 * @param used The amount of text already "used up" on this line. This is
962 * typically the length of a message header that this text
963 * get getting added to.
964 * @param s The text to fold.
965 *
966 * @return The input text, with linebreaks inserted at appropriate fold points.
967 */
968 public static String fold(int used, String s) {
969 // if folding is disable, unfolding is also. Return the string unchanged.
970 if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) {
971 return s;
972 }
973
974 int end;
975
976 // now we need to strip off any trailing "whitespace", where whitespace is blanks, tabs,
977 // and line break characters.
978 for (end = s.length() - 1; end >= 0; end--) {
979 int ch = s.charAt(end);
980 if (ch != ' ' && ch != '\t' ) {
981 break;
982 }
983 }
984
985 // did we actually find something to remove? Shorten the String to the trimmed length
986 if (end != s.length() - 1) {
987 s = s.substring(0, end + 1);
988 }
989
990 // does the string as it exists now not require folding? We can just had that back right off.
991 if (s.length() + used <= FOLD_THRESHOLD) {
992 return s;
993 }
994
995 // get a buffer for the length of the string, plus room for a few line breaks.
996 // these are soft line breaks, so we generally need more that just the line breaks (an escape +
997 // CR + LF + leading space on next line);
998 StringBuffer newString = new StringBuffer(s.length() + 8);
999
1000
1001 // now keep chopping this down until we've accomplished what we need.
1002 while (used + s.length() > FOLD_THRESHOLD) {
1003 int breakPoint = -1;
1004 char breakChar = 0;
1005
1006 // now scan for the next place where we can break.
1007 for (int i = 0; i < s.length(); i++) {
1008 // have we passed the fold limit?
1009 if (used + i > FOLD_THRESHOLD) {
1010 // if we've already seen a blank, then stop now. Otherwise
1011 // we keep going until we hit a fold point.
1012 if (breakPoint != -1) {
1013 break;
1014 }
1015 }
1016 char ch = s.charAt(i);
1017
1018 // a white space character?
1019 if (ch == ' ' || ch == '\t') {
1020 // this might be a run of white space, so skip over those now.
1021 breakPoint = i;
1022 // we need to maintain the same character type after the inserted linebreak.
1023 breakChar = ch;
1024 i++;
1025 while (i < s.length()) {
1026 ch = s.charAt(i);
1027 if (ch != ' ' && ch != '\t') {
1028 break;
1029 }
1030 i++;
1031 }
1032 }
1033 // found an embedded new line. Escape this so that the unfolding process preserves it.
1034 else if (ch == '\n') {
1035 newString.append('\\');
1036 newString.append('\n');
1037 }
1038 else if (ch == '\r') {
1039 newString.append('\\');
1040 newString.append('\n');
1041 i++;
1042 // if this is a CRLF pair, add the second char also
1043 if (i < s.length() && s.charAt(i) == '\n') {
1044 newString.append('\r');
1045 }
1046 }
1047
1048 }
1049 // no fold point found, we punt, append the remainder and leave.
1050 if (breakPoint == -1) {
1051 newString.append(s);
1052 return newString.toString();
1053 }
1054 newString.append(s.substring(0, breakPoint));
1055 newString.append("\r\n");
1056 newString.append(breakChar);
1057 // chop the string
1058 s = s.substring(breakPoint + 1);
1059 // start again, and we've used the first char of the limit already with the whitespace char.
1060 used = 1;
1061 }
1062
1063 // add on the remainder, and return
1064 newString.append(s);
1065 return newString.toString();
1066 }
1067
1068 /**
1069 * Unfold a folded string. The unfolding process will remove
1070 * any line breaks that are not escaped and which are also followed
1071 * by whitespace characters.
1072 *
1073 * @param s The folded string.
1074 *
1075 * @return A new string with unfolding rules applied.
1076 */
1077 public static String unfold(String s) {
1078 // if folding is disable, unfolding is also. Return the string unchanged.
1079 if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) {
1080 return s;
1081 }
1082
1083 // if there are no line break characters in the string, we can just return this.
1084 if (s.indexOf('\n') < 0 && s.indexOf('\r') < 0) {
1085 return s;
1086 }
1087
1088 // we need to scan and fix things up.
1089 int length = s.length();
1090
1091 StringBuffer newString = new StringBuffer(length);
1092
1093 // scan the entire string
1094 for (int i = 0; i < length; i++) {
1095 char ch = s.charAt(i);
1096
1097 // we have a backslash. In folded strings, escape characters are only processed as such if
1098 // they preceed line breaks. Otherwise, we leave it be.
1099 if (ch == '\\') {
1100 // escape at the very end? Just add the character.
1101 if (i == length - 1) {
1102 newString.append(ch);
1103 }
1104 else {
1105 int nextChar = s.charAt(i + 1);
1106
1107 // naked newline? Add the new line to the buffer, and skip the escape char.
1108 if (nextChar == '\n') {
1109 newString.append('\n');
1110 i++;
1111 }
1112 else if (nextChar == '\r') {
1113 // just the CR left? Add it, removing the escape.
1114 if (i == length - 2 || s.charAt(i + 2) != '\r') {
1115 newString.append('\r');
1116 i++;
1117 }
1118 else {
1119 // toss the escape, add both parts of the CRLF, and skip over two chars.
1120 newString.append('\r');
1121 newString.append('\n');
1122 i += 2;
1123 }
1124 }
1125 else {
1126 // an escape for another purpose, just copy it over.
1127 newString.append(ch);
1128 }
1129 }
1130 }
1131 // we have an unescaped line break
1132 else if (ch == '\n' || ch == '\r') {
1133 // remember the position in case we need to backtrack.
1134 int lineBreak = i;
1135 boolean CRLF = false;
1136
1137 if (ch == '\r') {
1138 // check to see if we need to step over this.
1139 if (i < length - 1 && s.charAt(i + 1) == '\n') {
1140 i++;
1141 // flag the type so we know what we might need to preserve.
1142 CRLF = true;
1143 }
1144 }
1145
1146 // get a temp position scanner.
1147 int scan = i + 1;
1148
1149 // does a blank follow this new line? we need to scrap the new line and reduce the leading blanks
1150 // down to a single blank.
1151 if (scan < length && s.charAt(scan) == ' ') {
1152 // add the character
1153 newString.append(' ');
1154
1155 // scan over the rest of the blanks
1156 i = scan + 1;
1157 while (i < length && s.charAt(i) == ' ') {
1158 i++;
1159 }
1160 // we'll increment down below, so back up to the last blank as the current char.
1161 i--;
1162 }
1163 else {
1164 // we must keep this line break. Append the appropriate style.
1165 if (CRLF) {
1166 newString.append("\r\n");
1167 }
1168 else {
1169 newString.append(ch);
1170 }
1171 }
1172 }
1173 else {
1174 // just a normal, ordinary character
1175 newString.append(ch);
1176 }
1177 }
1178 return newString.toString();
1179 }
1180 }
1181
1182
1183 /**
1184 * Utility class for examining content information written out
1185 * by a DataHandler object. This stream gathers statistics on
1186 * the stream so it can make transfer encoding determinations.
1187 */
1188 class ContentCheckingOutputStream extends OutputStream {
1189 private int asciiChars = 0;
1190 private int nonAsciiChars = 0;
1191 private boolean containsLongLines = false;
1192 private boolean containsMalformedEOL = false;
1193 private int previousChar = 0;
1194 private int span = 0;
1195
1196 ContentCheckingOutputStream() {
1197 }
1198
1199 public void write(byte[] data) throws IOException {
1200 write(data, 0, data.length);
1201 }
1202
1203 public void write(byte[] data, int offset, int length) throws IOException {
1204 for (int i = 0; i < length; i++) {
1205 write(data[offset + i]);
1206 }
1207 }
1208
1209 public void write(int ch) {
1210 // we found a linebreak. Reset the line length counters on either one. We don't
1211 // really need to validate here.
1212 if (ch == '\n' || ch == '\r') {
1213 // we found a newline, this is only valid if the previous char was the '\r'
1214 if (ch == '\n') {
1215 // malformed linebreak? force this to base64 encoding.
1216 if (previousChar != '\r') {
1217 containsMalformedEOL = true;
1218 }
1219 }
1220 // hit a line end, reset our line length counter
1221 span = 0;
1222 }
1223 else {
1224 span++;
1225 // the text has long lines, we can't transfer this as unencoded text.
1226 if (span > 998) {
1227 containsLongLines = true;
1228 }
1229
1230 // non-ascii character, we have to transfer this in binary.
1231 if (!ASCIIUtil.isAscii(ch)) {
1232 nonAsciiChars++;
1233 }
1234 else {
1235 asciiChars++;
1236 }
1237 }
1238 previousChar = ch;
1239 }
1240
1241
1242 public String getBinaryTransferEncoding() {
1243 if (nonAsciiChars != 0 || containsLongLines || containsMalformedEOL) {
1244 return "base64";
1245 }
1246 else {
1247 return "7bit";
1248 }
1249 }
1250
1251 public String getTextTransferEncoding() {
1252 // looking good so far, only valid chars here.
1253 if (nonAsciiChars == 0) {
1254 // does this contain long text lines? We need to use a Q-P encoding which will
1255 // be only slightly longer, but handles folding the longer lines.
1256 if (containsLongLines) {
1257 return "quoted-printable";
1258 }
1259 else {
1260 // ideal! Easiest one to handle.
1261 return "7bit";
1262 }
1263 }
1264 else {
1265 // mostly characters requiring encoding? Base64 is our best bet.
1266 if (nonAsciiChars > asciiChars) {
1267 return "base64";
1268 }
1269 else {
1270 // Q-P encoding will use fewer bytes than the full Base64.
1271 return "quoted-printable";
1272 }
1273 }
1274 }
1275 }