1 /**
2 *
3 * Copyright 2003-2004 The Apache Software Foundation
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package javax.mail.internet;
19
20 import java.io.BufferedInputStream;
21 import java.io.BufferedReader;
22 import java.io.ByteArrayInputStream;
23 import java.io.ByteArrayOutputStream;
24 import java.io.IOException;
25 import java.io.InputStream;
26 import java.io.InputStreamReader;
27 import java.io.OutputStream;
28 import java.io.UnsupportedEncodingException;
29 import java.util.HashMap;
30 import java.util.Map;
31 import java.util.NoSuchElementException;
32 import java.util.StringTokenizer;
33
34 import javax.activation.DataHandler;
35 import javax.activation.DataSource;
36 import javax.mail.MessagingException;
37
38 import org.apache.geronimo.mail.util.ASCIIUtil;
39 import org.apache.geronimo.mail.util.Base64;
40 import org.apache.geronimo.mail.util.Base64DecoderStream;
41 import org.apache.geronimo.mail.util.Base64Encoder;
42 import org.apache.geronimo.mail.util.Base64EncoderStream;
43 import org.apache.geronimo.mail.util.QuotedPrintableDecoderStream;
44 import org.apache.geronimo.mail.util.QuotedPrintableEncoderStream;
45 import org.apache.geronimo.mail.util.QuotedPrintableEncoder;
46 import org.apache.geronimo.mail.util.QuotedPrintable;
47 import org.apache.geronimo.mail.util.SessionUtil;
48 import org.apache.geronimo.mail.util.UUDecoderStream;
49 import org.apache.geronimo.mail.util.UUEncoderStream;
50
51
52
53
54 /**
55 * @version $Rev: 412426 $ $Date: 2006-06-07 08:21:46 -0700 (Wed, 07 Jun 2006) $
56 */
57 public class MimeUtility {
58
59 private static final String MIME_FOLDENCODEDWORDS = "mail.mime.foldencodedwords";
60 private static final String MIME_DECODE_TEXT_STRICT = "mail.mime.decodetext.strict";
61
62 private MimeUtility() {
63 }
64
65 public static final int ALL = -1;
66
67 private static String defaultJavaCharset;
68 private static String escapedChars = "\"\\\r\n";
69 private static String linearWhiteSpace = " \t\r\n";
70
71 private static String QP_WORD_SPECIALS = "=_?\"#$%&'(),.:;<>@[\\]^`{|}~";
72 private static String QP_TEXT_SPECIALS = "=_?";
73
74
75
76 private static Map java2mime;
77 private static Map mime2java;
78
79 static {
80
81 loadCharacterSetMappings();
82 }
83
84 public static InputStream decode(InputStream in, String encoding) throws MessagingException {
85 encoding = encoding.toLowerCase();
86
87
88 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
89 return in;
90 }
91 else if (encoding.equals("base64")) {
92 return new Base64DecoderStream(in);
93 }
94
95 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
96 return new UUDecoderStream(in);
97 }
98 else if (encoding.equals("quoted-printable")) {
99 return new QuotedPrintableDecoderStream(in);
100 }
101 else {
102 throw new MessagingException("Unknown encoding " + encoding);
103 }
104 }
105
106 /**
107 * Decode a string of text obtained from a mail header into
108 * it's proper form. The text generally will consist of a
109 * string of tokens, some of which may be encoded using
110 * base64 encoding.
111 *
112 * @param text The text to decode.
113 *
114 * @return The decoded test string.
115 * @exception UnsupportedEncodingException
116 */
117 public static String decodeText(String text) throws UnsupportedEncodingException {
118
119
120 if (text.indexOf("=?") < 0) {
121 return text;
122 }
123
124
125 if (!SessionUtil.getBooleanProperty(MIME_DECODE_TEXT_STRICT, true)) {
126 return decodeTextNonStrict(text);
127 }
128
129 int offset = 0;
130 int endOffset = text.length();
131
132 int startWhiteSpace = -1;
133 int endWhiteSpace = -1;
134
135 StringBuffer decodedText = new StringBuffer(text.length());
136
137 boolean previousTokenEncoded = false;
138
139 while (offset < endOffset) {
140 char ch = text.charAt(offset);
141
142
143 if (linearWhiteSpace.indexOf(ch) != -1) {
144 startWhiteSpace = offset;
145 while (offset < endOffset) {
146
147 ch = text.charAt(offset);
148 if (linearWhiteSpace.indexOf(ch) != -1) {
149 offset++;
150 }
151 else {
152
153
154 endWhiteSpace = offset;
155 break;
156 }
157 }
158 }
159 else {
160
161 int wordStart = offset;
162
163 while (offset < endOffset) {
164
165 ch = text.charAt(offset);
166 if (linearWhiteSpace.indexOf(ch) == -1) {
167 offset++;
168 }
169 else {
170 break;
171 }
172
173
174 }
175
176 String word = text.substring(wordStart, offset);
177
178 if (word.startsWith("=?")) {
179 try {
180
181 String decodedWord = decodeWord(word);
182
183
184 if (!previousTokenEncoded) {
185 if (startWhiteSpace != -1) {
186 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
187 startWhiteSpace = -1;
188 }
189 }
190
191 previousTokenEncoded = true;
192
193 decodedText.append(decodedWord);
194
195
196 continue;
197
198 } catch (ParseException e) {
199 }
200 }
201
202
203 if (startWhiteSpace != -1) {
204 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
205 startWhiteSpace = -1;
206 }
207
208 previousTokenEncoded = false;
209 decodedText.append(word);
210 }
211 }
212
213 return decodedText.toString();
214 }
215
216
217 /**
218 * Decode a string of text obtained from a mail header into
219 * it's proper form. The text generally will consist of a
220 * string of tokens, some of which may be encoded using
221 * base64 encoding. This is for non-strict decoded for mailers that
222 * violate the RFC 2047 restriction that decoded tokens must be delimited
223 * by linear white space. This will scan tokens looking for inner tokens
224 * enclosed in "=?" -- "?=" pairs.
225 *
226 * @param text The text to decode.
227 *
228 * @return The decoded test string.
229 * @exception UnsupportedEncodingException
230 */
231 private static String decodeTextNonStrict(String text) throws UnsupportedEncodingException {
232 int offset = 0;
233 int endOffset = text.length();
234
235 int startWhiteSpace = -1;
236 int endWhiteSpace = -1;
237
238 StringBuffer decodedText = new StringBuffer(text.length());
239
240 boolean previousTokenEncoded = false;
241
242 while (offset < endOffset) {
243 char ch = text.charAt(offset);
244
245
246 if (linearWhiteSpace.indexOf(ch) != -1) {
247 startWhiteSpace = offset;
248 while (offset < endOffset) {
249
250 ch = text.charAt(offset);
251 if (linearWhiteSpace.indexOf(ch) != -1) {
252 offset++;
253 }
254 else {
255
256
257 endWhiteSpace = offset;
258 break;
259 }
260 }
261 }
262 else {
263
264 int wordStart = offset;
265
266 while (offset < endOffset) {
267
268 ch = text.charAt(offset);
269 if (linearWhiteSpace.indexOf(ch) == -1) {
270 offset++;
271 }
272 else {
273 break;
274 }
275
276
277 }
278
279 String word = text.substring(wordStart, offset);
280
281 int decodeStart = 0;
282
283
284 while (decodeStart < word.length()) {
285 int tokenStart = word.indexOf("=?", decodeStart);
286 if (tokenStart == -1) {
287
288
289 if (startWhiteSpace != -1) {
290 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
291 startWhiteSpace = -1;
292 }
293
294 previousTokenEncoded = false;
295 decodedText.append(word.substring(decodeStart));
296
297 break;
298 }
299
300 else {
301
302 if (tokenStart != decodeStart) {
303
304
305 if (startWhiteSpace != -1) {
306 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
307 startWhiteSpace = -1;
308 }
309
310 previousTokenEncoded = false;
311 decodedText.append(word.substring(decodeStart, tokenStart));
312 }
313
314
315 int tokenEnd = word.indexOf("?=", tokenStart);
316
317 if (tokenEnd == -1) {
318
319
320 if (startWhiteSpace != -1) {
321 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
322 startWhiteSpace = -1;
323 }
324
325 previousTokenEncoded = false;
326 decodedText.append(word.substring(tokenStart));
327
328 break;
329 }
330 else {
331
332 decodeStart = tokenEnd + 2;
333
334 String token = word.substring(tokenStart, tokenEnd);
335 try {
336
337 String decodedWord = decodeWord(token);
338
339
340 if (!previousTokenEncoded) {
341 if (startWhiteSpace != -1) {
342 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
343 startWhiteSpace = -1;
344 }
345 }
346
347 previousTokenEncoded = true;
348
349 decodedText.append(decodedWord);
350
351
352 continue;
353
354 } catch (ParseException e) {
355 }
356
357
358 if (startWhiteSpace != -1) {
359 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
360 startWhiteSpace = -1;
361 }
362
363 previousTokenEncoded = false;
364 decodedText.append(token);
365 }
366 }
367 }
368 }
369 }
370
371 return decodedText.toString();
372 }
373
374 /**
375 * Parse a string using the RFC 2047 rules for an "encoded-word"
376 * type. This encoding has the syntax:
377 *
378 * encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
379 *
380 * @param word The possibly encoded word value.
381 *
382 * @return The decoded word.
383 * @exception ParseException
384 * @exception UnsupportedEncodingException
385 */
386 public static String decodeWord(String word) throws ParseException, UnsupportedEncodingException {
387
388
389
390 if (!word.startsWith("=?")) {
391 throw new ParseException("Invalid RFC 2047 encoded-word: " + word);
392 }
393
394 int charsetPos = word.indexOf('?', 2);
395 if (charsetPos == -1) {
396 throw new ParseException("Missing charset in RFC 2047 encoded-word: " + word);
397 }
398
399
400 String charset = word.substring(2, charsetPos).toLowerCase();
401
402
403 int encodingPos = word.indexOf('?', charsetPos + 1);
404 if (encodingPos == -1) {
405 throw new ParseException("Missing encoding in RFC 2047 encoded-word: " + word);
406 }
407
408 String encoding = word.substring(charsetPos + 1, encodingPos);
409
410
411 int encodedTextPos = word.indexOf("?=", encodingPos + 1);
412 if (encodedTextPos == -1) {
413 throw new ParseException("Missing encoded text in RFC 2047 encoded-word: " + word);
414 }
415
416 String encodedText = word.substring(encodingPos + 1, encodedTextPos);
417
418
419 if (encodedText.length() == 0) {
420 return "";
421 }
422
423 try {
424
425 ByteArrayOutputStream out = new ByteArrayOutputStream(encodedText.length());
426
427 byte[] encodedData = encodedText.getBytes("US-ASCII");
428
429
430 if (encoding.equals("B")) {
431 Base64.decode(encodedData, out);
432 }
433
434 else if (encoding.equals("Q")) {
435 QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
436 dataEncoder.decodeWord(encodedData, out);
437 }
438 else {
439 throw new UnsupportedEncodingException("Unknown RFC 2047 encoding: " + encoding);
440 }
441
442 byte[] decodedData = out.toByteArray();
443 return new String(decodedData, javaCharset(charset));
444 } catch (IOException e) {
445 throw new UnsupportedEncodingException("Invalid RFC 2047 encoding");
446 }
447
448 }
449
450 /**
451 * Wrap an encoder around a given output stream.
452 *
453 * @param out The output stream to wrap.
454 * @param encoding The name of the encoding.
455 *
456 * @return A instance of FilterOutputStream that manages on the fly
457 * encoding for the requested encoding type.
458 * @exception MessagingException
459 */
460 public static OutputStream encode(OutputStream out, String encoding) throws MessagingException {
461
462 if (encoding == null) {
463 return out;
464 }
465
466 encoding = encoding.toLowerCase();
467
468
469 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
470 return out;
471 }
472 else if (encoding.equals("base64")) {
473 return new Base64EncoderStream(out);
474 }
475
476 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
477 return new UUEncoderStream(out);
478 }
479 else if (encoding.equals("quoted-printable")) {
480 return new QuotedPrintableEncoderStream(out);
481 }
482 else {
483 throw new MessagingException("Unknown encoding " + encoding);
484 }
485 }
486
487 /**
488 * Wrap an encoder around a given output stream.
489 *
490 * @param out The output stream to wrap.
491 * @param encoding The name of the encoding.
492 * @param filename The filename of the data being sent (only used for UUEncode).
493 *
494 * @return A instance of FilterOutputStream that manages on the fly
495 * encoding for the requested encoding type.
496 * @exception MessagingException
497 */
498 public static OutputStream encode(OutputStream out, String encoding, String filename) throws MessagingException {
499 encoding = encoding.toLowerCase();
500
501
502 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
503 return out;
504 }
505 else if (encoding.equals("base64")) {
506 return new Base64EncoderStream(out);
507 }
508
509 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
510 return new UUEncoderStream(out, filename);
511 }
512 else if (encoding.equals("quoted-printable")) {
513 return new QuotedPrintableEncoderStream(out);
514 }
515 else {
516 throw new MessagingException("Unknown encoding " + encoding);
517 }
518 }
519
520
521 public static String encodeText(String word) throws UnsupportedEncodingException {
522 return encodeText(word, null, null);
523 }
524
525 public static String encodeText(String word, String charset, String encoding) throws UnsupportedEncodingException {
526 return encodeWord(word, charset, encoding, false);
527 }
528
529 public static String encodeWord(String word) throws UnsupportedEncodingException {
530 return encodeWord(word, null, null);
531 }
532
533 public static String encodeWord(String word, String charset, String encoding) throws UnsupportedEncodingException {
534 return encodeWord(word, charset, encoding, true);
535 }
536
537
538 private static String encodeWord(String word, String charset, String encoding, boolean encodingWord) throws UnsupportedEncodingException {
539
540
541 String encoder = ASCIIUtil.getTextTransferEncoding(word);
542
543 if (encoder.equals("7bit")) {
544 return word;
545 }
546
547
548 if (charset == null) {
549 charset = getDefaultMIMECharset();
550 }
551
552
553 if (encoding != null) {
554 if (encoding.equalsIgnoreCase("B")) {
555 encoder = "base64";
556 }
557 else if (encoding.equalsIgnoreCase("Q")) {
558 encoder = "quoted-printable";
559 }
560 else {
561 throw new UnsupportedEncodingException("Unknown transfer encoding: " + encoding);
562 }
563 }
564
565 try {
566
567 InputStream in = new ByteArrayInputStream(word.getBytes( javaCharset(charset)));
568 ByteArrayOutputStream out = new ByteArrayOutputStream();
569
570 if (encoder.equals("base64")) {
571 Base64Encoder dataEncoder = new Base64Encoder();
572 dataEncoder.encodeWord(in, charset, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false));
573 }
574 else {
575 QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
576 dataEncoder.encodeWord(in, charset, encodingWord ? QP_WORD_SPECIALS : QP_TEXT_SPECIALS, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false));
577 }
578
579 byte[] bytes = out.toByteArray();
580 return new String(bytes);
581 } catch (IOException e) {
582 throw new UnsupportedEncodingException("Invalid encoding");
583 }
584 }
585
586
587 /**
588 * Examine the content of a data source and decide what type
589 * of transfer encoding should be used. For text streams,
590 * we'll decided between 7bit, quoted-printable, and base64.
591 * For binary content types, we'll use either 7bit or base64.
592 *
593 * @param handler The DataHandler associated with the content.
594 *
595 * @return The string name of an encoding used to transfer the content.
596 */
597 public static String getEncoding(DataHandler handler) {
598
599
600
601
602
603 DataSource ds = handler.getDataSource();
604 if (ds != null) {
605 return getEncoding(ds);
606 }
607
608 try {
609
610 ContentType content = new ContentType(ds.getContentType());
611
612
613
614
615 ContentCheckingOutputStream checker = new ContentCheckingOutputStream();
616
617 handler.writeTo(checker);
618
619
620 if (content.match("text/*")) {
621 return checker.getTextTransferEncoding();
622 }
623 else {
624 return checker.getBinaryTransferEncoding();
625 }
626
627 } catch (Exception e) {
628
629 return "base64";
630 }
631 }
632
633
634 /**
635 * Determine the what transfer encoding should be used for
636 * data retrieved from a DataSource.
637 *
638 * @param source The DataSource for the transmitted data.
639 *
640 * @return The string name of the encoding form that should be used for
641 * the data.
642 */
643 public static String getEncoding(DataSource source) {
644 InputStream in = null;
645
646 try {
647
648 ContentType content = new ContentType(source.getContentType());
649
650
651 in = source.getInputStream();
652
653 if (!content.match("text/*")) {
654
655
656 return ASCIIUtil.getBinaryTransferEncoding(in);
657 }
658 else {
659 return ASCIIUtil.getTextTransferEncoding(in);
660 }
661 } catch (Exception e) {
662
663
664 return "base64";
665 } finally {
666
667 try {
668 if (in != null) {
669 in.close();
670 }
671 } catch (IOException e) {
672 }
673 }
674 }
675
676
677 /**
678 * Quote a "word" value. If the word contains any character from
679 * the specified "specials" list, this value is returned as a
680 * quoted strong. Otherwise, it is returned unchanged (an "atom").
681 *
682 * @param word The word requiring quoting.
683 * @param specials The set of special characters that can't appear in an unquoted
684 * string.
685 *
686 * @return The quoted value. This will be unchanged if the word doesn't contain
687 * any of the designated special characters.
688 */
689 public static String quote(String word, String specials) {
690 int wordLength = word.length();
691 boolean requiresQuoting = false;
692
693 for (int i =0; i < wordLength; i++) {
694 char ch = word.charAt(i);
695
696 if (escapedChars.indexOf(ch) >= 0) {
697 return quoteAndEscapeString(word);
698 }
699
700 if (ch < 32 || ch >= 127 || specials.indexOf(ch) >= 0) {
701
702
703 return quoteAndEscapeString(word);
704 }
705 }
706 return word;
707 }
708
709 /**
710 * Take a string and return it as a formatted quoted string, with
711 * all characters requiring escaping handled properly.
712 *
713 * @param word The string to quote.
714 *
715 * @return The quoted string.
716 */
717 private static String quoteAndEscapeString(String word) {
718 int wordLength = word.length();
719
720 StringBuffer buffer = new StringBuffer(wordLength + 10);
721
722 buffer.append('"');
723
724 for (int i = 0; i < wordLength; i++) {
725 char ch = word.charAt(i);
726
727 if (escapedChars.indexOf(ch) >= 0) {
728
729 buffer.append('\\');
730 }
731 buffer.append(ch);
732 }
733
734 buffer.append('"');
735 return buffer.toString();
736 }
737
738 /**
739 * Translate a MIME standard character set name into the Java
740 * equivalent.
741 *
742 * @param charset The MIME standard name.
743 *
744 * @return The Java equivalent for this name.
745 */
746 public static String javaCharset(String charset) {
747
748 if (charset == null) {
749 return null;
750 }
751
752 String mappedCharset = (String)mime2java.get(charset.toLowerCase());
753
754
755 return mappedCharset == null ? charset : mappedCharset;
756 }
757
758 /**
759 * Map a Java character set name into the MIME equivalent.
760 *
761 * @param charset The java character set name.
762 *
763 * @return The MIME standard equivalent for this character set name.
764 */
765 public static String mimeCharset(String charset) {
766
767 if (charset == null) {
768 return null;
769 }
770
771 String mappedCharset = (String)java2mime.get(charset.toLowerCase());
772
773
774 return mappedCharset == null ? charset : mappedCharset;
775 }
776
777
778 /**
779 * Get the default character set to use, in Java name format.
780 * This either be the value set with the mail.mime.charset
781 * system property or obtained from the file.encoding system
782 * property. If neither of these is set, we fall back to
783 * 8859_1 (basically US-ASCII).
784 *
785 * @return The character string value of the default character set.
786 */
787 public static String getDefaultJavaCharset() {
788 String charset = SessionUtil.getProperty("mail.mime.charset");
789 if (charset != null) {
790 return javaCharset(charset);
791 }
792 return SessionUtil.getProperty("file.encoding", "8859_1");
793 }
794
795 /**
796 * Get the default character set to use, in MIME name format.
797 * This either be the value set with the mail.mime.charset
798 * system property or obtained from the file.encoding system
799 * property. If neither of these is set, we fall back to
800 * 8859_1 (basically US-ASCII).
801 *
802 * @return The character string value of the default character set.
803 */
804 static String getDefaultMIMECharset() {
805
806 String charset = SessionUtil.getProperty("mail.mime.charset");
807 if (charset != null) {
808 return charset;
809 }
810
811
812 return mimeCharset(SessionUtil.getProperty("file.encoding", "8859_1"));
813 }
814
815
816 /**
817 * Load the default mapping tables used by the javaCharset()
818 * and mimeCharset() methods. By default, these tables are
819 * loaded from the /META-INF/javamail.charset.map file. If
820 * something goes wrong loading that file, we configure things
821 * with a default mapping table (which just happens to mimic
822 * what's in the default mapping file).
823 */
824 static private void loadCharacterSetMappings() {
825 java2mime = new HashMap();
826 mime2java = new HashMap();
827
828
829
830 try {
831 InputStream map = javax.mail.internet.MimeUtility.class.getResourceAsStream("/META-INF/javamail.charset.map");
832
833 if (map != null) {
834
835 BufferedReader reader = new BufferedReader(new InputStreamReader(map));
836
837 readMappings(reader, java2mime);
838 readMappings(reader, mime2java);
839 }
840 } catch (Exception e) {
841 }
842
843
844
845
846
847
848 if (java2mime.isEmpty()) {
849 java2mime.put("8859_1", "ISO-8859-1");
850 java2mime.put("iso8859_1", "ISO-8859-1");
851 java2mime.put("iso8859-1", "ISO-8859-1");
852
853 java2mime.put("8859_2", "ISO-8859-2");
854 java2mime.put("iso8859_2", "ISO-8859-2");
855 java2mime.put("iso8859-2", "ISO-8859-2");
856
857 java2mime.put("8859_3", "ISO-8859-3");
858 java2mime.put("iso8859_3", "ISO-8859-3");
859 java2mime.put("iso8859-3", "ISO-8859-3");
860
861 java2mime.put("8859_4", "ISO-8859-4");
862 java2mime.put("iso8859_4", "ISO-8859-4");
863 java2mime.put("iso8859-4", "ISO-8859-4");
864
865 java2mime.put("8859_5", "ISO-8859-5");
866 java2mime.put("iso8859_5", "ISO-8859-5");
867 java2mime.put("iso8859-5", "ISO-8859-5");
868
869 java2mime.put ("8859_6", "ISO-8859-6");
870 java2mime.put("iso8859_6", "ISO-8859-6");
871 java2mime.put("iso8859-6", "ISO-8859-6");
872
873 java2mime.put("8859_7", "ISO-8859-7");
874 java2mime.put("iso8859_7", "ISO-8859-7");
875 java2mime.put("iso8859-7", "ISO-8859-7");
876
877 java2mime.put("8859_8", "ISO-8859-8");
878 java2mime.put("iso8859_8", "ISO-8859-8");
879 java2mime.put("iso8859-8", "ISO-8859-8");
880
881 java2mime.put("8859_9", "ISO-8859-9");
882 java2mime.put("iso8859_9", "ISO-8859-9");
883 java2mime.put("iso8859-9", "ISO-8859-9");
884
885 java2mime.put("sjis", "Shift_JIS");
886 java2mime.put ("jis", "ISO-2022-JP");
887 java2mime.put("iso2022jp", "ISO-2022-JP");
888 java2mime.put("euc_jp", "euc-jp");
889 java2mime.put("koi8_r", "koi8-r");
890 java2mime.put("euc_cn", "euc-cn");
891 java2mime.put("euc_tw", "euc-tw");
892 java2mime.put("euc_kr", "euc-kr");
893 }
894
895 if (mime2java.isEmpty ()) {
896 mime2java.put("iso-2022-cn", "ISO2022CN");
897 mime2java.put("iso-2022-kr", "ISO2022KR");
898 mime2java.put("utf-8", "UTF8");
899 mime2java.put("utf8", "UTF8");
900 mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
901 mime2java.put("ja_jp.eucjp", "EUCJIS");
902 mime2java.put ("euc-kr", "KSC5601");
903 mime2java.put("euckr", "KSC5601");
904 mime2java.put("us-ascii", "ISO-8859-1");
905 mime2java.put("x-us-ascii", "ISO-8859-1");
906 }
907 }
908
909
910 /**
911 * Read a section of a character map table and populate the
912 * target mapping table with the information. The table end
913 * is marked by a line starting with "--" and also ending with
914 * "--". Blank lines and comment lines (beginning with '#') are
915 * ignored.
916 *
917 * @param reader The source of the file information.
918 * @param table The mapping table used to store the information.
919 */
920 static private void readMappings(BufferedReader reader, Map table) throws IOException {
921
922 while (true) {
923 String line = reader.readLine();
924
925 if (line == null) {
926 return;
927 }
928
929
930 line = line.trim();
931
932 if (line.length() == 0 || line.startsWith("#")) {
933 continue;
934 }
935
936
937 if (line.startsWith("--") && line.endsWith("--")) {
938 return;
939 }
940
941
942 StringTokenizer tokenizer = new StringTokenizer(line, " \t");
943
944 try {
945 String from = tokenizer.nextToken().toLowerCase();
946 String to = tokenizer.nextToken();
947
948 table.put(from, to);
949 } catch (NoSuchElementException e) {
950
951 }
952 }
953 }
954
955
956 }
957
958
959 /**
960 * Utility class for examining content information written out
961 * by a DataHandler object. This stream gathers statistics on
962 * the stream so it can make transfer encoding determinations.
963 */
964 class ContentCheckingOutputStream extends OutputStream {
965 private int asciiChars = 0;
966 private int nonAsciiChars = 0;
967 private boolean containsLongLines = false;
968 private boolean containsMalformedEOL = false;
969 private int previousChar = 0;
970 private int span = 0;
971
972 ContentCheckingOutputStream() {
973 }
974
975 public void write(byte[] data) throws IOException {
976 write(data, 0, data.length);
977 }
978
979 public void write(byte[] data, int offset, int length) throws IOException {
980 for (int i = 0; i < length; i++) {
981 write(data[offset + i]);
982 }
983 }
984
985 public void write(int ch) {
986
987
988 if (ch == '\n' || ch == '\r') {
989
990 if (ch == '\n') {
991
992 if (previousChar != '\r') {
993 containsMalformedEOL = true;
994 }
995 }
996
997 span = 0;
998 }
999 else {
1000 span++;
1001
1002 if (span > 998) {
1003 containsLongLines = true;
1004 }
1005
1006
1007 if (!ASCIIUtil.isAscii(ch)) {
1008 nonAsciiChars++;
1009 }
1010 else {
1011 asciiChars++;
1012 }
1013 }
1014 previousChar = ch;
1015 }
1016
1017
1018 public String getBinaryTransferEncoding() {
1019 if (nonAsciiChars != 0 || containsLongLines || containsMalformedEOL) {
1020 return "base64";
1021 }
1022 else {
1023 return "7bit";
1024 }
1025 }
1026
1027 public String getTextTransferEncoding() {
1028
1029 if (nonAsciiChars == 0) {
1030
1031
1032 if (containsLongLines) {
1033 return "quoted-printable";
1034 }
1035 else {
1036
1037 return "7bit";
1038 }
1039 }
1040 else {
1041
1042 if (nonAsciiChars > asciiChars) {
1043 return "base64";
1044 }
1045 else {
1046
1047 return "quoted-printable";
1048 }
1049 }
1050 }
1051 }