Clover coverage report - Maven Clover report
Coverage timestamp: Sun Aug 20 2006 04:01:04 PDT
file stats: LOC: 1,051   Methods: 28
NCLOC: 599   Classes: 2
 
 Source file Conditionals Statements Methods TOTAL
MimeUtility.java 45.3% 45.8% 64.3% 46.6%
coverage coverage
 1    /**
 2    *
 3    * Copyright 2003-2004 The Apache Software Foundation
 4    *
 5    * Licensed under the Apache License, Version 2.0 (the "License");
 6    * you may not use this file except in compliance with the License.
 7    * You may obtain a copy of the License at
 8    *
 9    * http://www.apache.org/licenses/LICENSE-2.0
 10    *
 11    * Unless required by applicable law or agreed to in writing, software
 12    * distributed under the License is distributed on an "AS IS" BASIS,
 13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14    * See the License for the specific language governing permissions and
 15    * limitations under the License.
 16    */
 17   
 18    package javax.mail.internet;
 19   
 20    import java.io.BufferedInputStream;
 21    import java.io.BufferedReader;
 22    import java.io.ByteArrayInputStream;
 23    import java.io.ByteArrayOutputStream;
 24    import java.io.IOException;
 25    import java.io.InputStream;
 26    import java.io.InputStreamReader;
 27    import java.io.OutputStream;
 28    import java.io.UnsupportedEncodingException;
 29    import java.util.HashMap;
 30    import java.util.Map;
 31    import java.util.NoSuchElementException;
 32    import java.util.StringTokenizer;
 33   
 34    import javax.activation.DataHandler;
 35    import javax.activation.DataSource;
 36    import javax.mail.MessagingException;
 37   
 38    import org.apache.geronimo.mail.util.ASCIIUtil;
 39    import org.apache.geronimo.mail.util.Base64;
 40    import org.apache.geronimo.mail.util.Base64DecoderStream;
 41    import org.apache.geronimo.mail.util.Base64Encoder;
 42    import org.apache.geronimo.mail.util.Base64EncoderStream;
 43    import org.apache.geronimo.mail.util.QuotedPrintableDecoderStream;
 44    import org.apache.geronimo.mail.util.QuotedPrintableEncoderStream;
 45    import org.apache.geronimo.mail.util.QuotedPrintableEncoder;
 46    import org.apache.geronimo.mail.util.QuotedPrintable;
 47    import org.apache.geronimo.mail.util.SessionUtil;
 48    import org.apache.geronimo.mail.util.UUDecoderStream;
 49    import org.apache.geronimo.mail.util.UUEncoderStream;
 50   
 51    // encodings include "base64", "quoted-printable", "7bit", "8bit" and "binary".
 52    // In addition, "uuencode" is also supported. The
 53   
 54    /**
 55    * @version $Rev: 412426 $ $Date: 2006-06-07 08:21:46 -0700 (Wed, 07 Jun 2006) $
 56    */
 57    public class MimeUtility {
 58   
 59    private static final String MIME_FOLDENCODEDWORDS = "mail.mime.foldencodedwords";
 60    private static final String MIME_DECODE_TEXT_STRICT = "mail.mime.decodetext.strict";
 61   
 62  0 private MimeUtility() {
 63    }
 64   
 65    public static final int ALL = -1;
 66   
 67    private static String defaultJavaCharset;
 68    private static String escapedChars = "\"\\\r\n";
 69    private static String linearWhiteSpace = " \t\r\n";
 70   
 71    private static String QP_WORD_SPECIALS = "=_?\"#$%&'(),.:;<>@[\\]^`{|}~";
 72    private static String QP_TEXT_SPECIALS = "=_?";
 73   
 74    // the javamail spec includes the ability to map java encoding names to MIME-specified names. Normally,
 75    // these values are loaded from a character mapping file.
 76    private static Map java2mime;
 77    private static Map mime2java;
 78   
 79    static {
 80    // we need to load the mapping tables used by javaCharset() and mimeCharset().
 81  1 loadCharacterSetMappings();
 82    }
 83   
 84  16 public static InputStream decode(InputStream in, String encoding) throws MessagingException {
 85  16 encoding = encoding.toLowerCase();
 86   
 87    // some encodies are just pass-throughs, with no real decoding.
 88  16 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
 89  4 return in;
 90    }
 91  12 else if (encoding.equals("base64")) {
 92  4 return new Base64DecoderStream(in);
 93    }
 94    // UUEncode is known by a couple historical extension names too.
 95  8 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
 96  5 return new UUDecoderStream(in);
 97    }
 98  3 else if (encoding.equals("quoted-printable")) {
 99  3 return new QuotedPrintableDecoderStream(in);
 100    }
 101    else {
 102  0 throw new MessagingException("Unknown encoding " + encoding);
 103    }
 104    }
 105   
 106    /**
 107    * Decode a string of text obtained from a mail header into
 108    * it's proper form. The text generally will consist of a
 109    * string of tokens, some of which may be encoded using
 110    * base64 encoding.
 111    *
 112    * @param text The text to decode.
 113    *
 114    * @return The decoded test string.
 115    * @exception UnsupportedEncodingException
 116    */
 117  9 public static String decodeText(String text) throws UnsupportedEncodingException {
 118    // if the text contains any encoded tokens, those tokens will be marked with "=?". If the
 119    // source string doesn't contain that sequent, no decoding is required.
 120  9 if (text.indexOf("=?") < 0) {
 121  3 return text;
 122    }
 123   
 124    // we have two sets of rules we can apply.
 125  6 if (!SessionUtil.getBooleanProperty(MIME_DECODE_TEXT_STRICT, true)) {
 126  0 return decodeTextNonStrict(text);
 127    }
 128   
 129  6 int offset = 0;
 130  6 int endOffset = text.length();
 131   
 132  6 int startWhiteSpace = -1;
 133  6 int endWhiteSpace = -1;
 134   
 135  6 StringBuffer decodedText = new StringBuffer(text.length());
 136   
 137  6 boolean previousTokenEncoded = false;
 138   
 139  6 while (offset < endOffset) {
 140  6 char ch = text.charAt(offset);
 141   
 142    // is this a whitespace character?
 143  6 if (linearWhiteSpace.indexOf(ch) != -1) {
 144  0 startWhiteSpace = offset;
 145  0 while (offset < endOffset) {
 146    // step over the white space characters.
 147  0 ch = text.charAt(offset);
 148  0 if (linearWhiteSpace.indexOf(ch) != -1) {
 149  0 offset++;
 150    }
 151    else {
 152    // record the location of the first non lwsp and drop down to process the
 153    // token characters.
 154  0 endWhiteSpace = offset;
 155  0 break;
 156    }
 157    }
 158    }
 159    else {
 160    // we have a word token. We need to scan over the word and then try to parse it.
 161  6 int wordStart = offset;
 162   
 163  6 while (offset < endOffset) {
 164    // step over the white space characters.
 165  201 ch = text.charAt(offset);
 166  201 if (linearWhiteSpace.indexOf(ch) == -1) {
 167  201 offset++;
 168    }
 169    else {
 170  0 break;
 171    }
 172   
 173    //NB: Trailing whitespace on these header strings will just be discarded.
 174    }
 175    // pull out the word token.
 176  6 String word = text.substring(wordStart, offset);
 177    // is the token encoded? decode the word
 178  6 if (word.startsWith("=?")) {
 179  6 try {
 180    // if this gives a parsing failure, treat it like a non-encoded word.
 181  6 String decodedWord = decodeWord(word);
 182   
 183    // are any whitespace characters significant? Append 'em if we've got 'em.
 184  6 if (!previousTokenEncoded) {
 185  6 if (startWhiteSpace != -1) {
 186  0 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
 187  0 startWhiteSpace = -1;
 188    }
 189    }
 190    // this is definitely a decoded token.
 191  6 previousTokenEncoded = true;
 192    // and add this to the text.
 193  6 decodedText.append(decodedWord);
 194    // we continue parsing from here...we allow parsing errors to fall through
 195    // and get handled as normal text.
 196  6 continue;
 197   
 198    } catch (ParseException e) {
 199    }
 200    }
 201    // this is a normal token, so it doesn't matter what the previous token was. Add the white space
 202    // if we have it.
 203  0 if (startWhiteSpace != -1) {
 204  0 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
 205  0 startWhiteSpace = -1;
 206    }
 207    // this is not a decoded token.
 208  0 previousTokenEncoded = false;
 209  0 decodedText.append(word);
 210    }
 211    }
 212   
 213  6 return decodedText.toString();
 214    }
 215   
 216   
 217    /**
 218    * Decode a string of text obtained from a mail header into
 219    * it's proper form. The text generally will consist of a
 220    * string of tokens, some of which may be encoded using
 221    * base64 encoding. This is for non-strict decoded for mailers that
 222    * violate the RFC 2047 restriction that decoded tokens must be delimited
 223    * by linear white space. This will scan tokens looking for inner tokens
 224    * enclosed in "=?" -- "?=" pairs.
 225    *
 226    * @param text The text to decode.
 227    *
 228    * @return The decoded test string.
 229    * @exception UnsupportedEncodingException
 230    */
 231  0 private static String decodeTextNonStrict(String text) throws UnsupportedEncodingException {
 232  0 int offset = 0;
 233  0 int endOffset = text.length();
 234   
 235  0 int startWhiteSpace = -1;
 236  0 int endWhiteSpace = -1;
 237   
 238  0 StringBuffer decodedText = new StringBuffer(text.length());
 239   
 240  0 boolean previousTokenEncoded = false;
 241   
 242  0 while (offset < endOffset) {
 243  0 char ch = text.charAt(offset);
 244   
 245    // is this a whitespace character?
 246  0 if (linearWhiteSpace.indexOf(ch) != -1) {
 247  0 startWhiteSpace = offset;
 248  0 while (offset < endOffset) {
 249    // step over the white space characters.
 250  0 ch = text.charAt(offset);
 251  0 if (linearWhiteSpace.indexOf(ch) != -1) {
 252  0 offset++;
 253    }
 254    else {
 255    // record the location of the first non lwsp and drop down to process the
 256    // token characters.
 257  0 endWhiteSpace = offset;
 258  0 break;
 259    }
 260    }
 261    }
 262    else {
 263    // we're at the start of a word token. We potentially need to break this up into subtokens
 264  0 int wordStart = offset;
 265   
 266  0 while (offset < endOffset) {
 267    // step over the white space characters.
 268  0 ch = text.charAt(offset);
 269  0 if (linearWhiteSpace.indexOf(ch) == -1) {
 270  0 offset++;
 271    }
 272    else {
 273  0 break;
 274    }
 275   
 276    //NB: Trailing whitespace on these header strings will just be discarded.
 277    }
 278    // pull out the word token.
 279  0 String word = text.substring(wordStart, offset);
 280   
 281  0 int decodeStart = 0;
 282   
 283    // now scan and process each of the bits within here.
 284  0 while (decodeStart < word.length()) {
 285  0 int tokenStart = word.indexOf("=?", decodeStart);
 286  0 if (tokenStart == -1) {
 287    // this is a normal token, so it doesn't matter what the previous token was. Add the white space
 288    // if we have it.
 289  0 if (startWhiteSpace != -1) {
 290  0 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
 291  0 startWhiteSpace = -1;
 292    }
 293    // this is not a decoded token.
 294  0 previousTokenEncoded = false;
 295  0 decodedText.append(word.substring(decodeStart));
 296    // we're finished.
 297  0 break;
 298    }
 299    // we have something to process
 300    else {
 301    // we might have a normal token preceeding this.
 302  0 if (tokenStart != decodeStart) {
 303    // this is a normal token, so it doesn't matter what the previous token was. Add the white space
 304    // if we have it.
 305  0 if (startWhiteSpace != -1) {
 306  0 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
 307  0 startWhiteSpace = -1;
 308    }
 309    // this is not a decoded token.
 310  0 previousTokenEncoded = false;
 311  0 decodedText.append(word.substring(decodeStart, tokenStart));
 312    }
 313   
 314    // now find the end marker.
 315  0 int tokenEnd = word.indexOf("?=", tokenStart);
 316    // sigh, an invalid token. Treat this as plain text.
 317  0 if (tokenEnd == -1) {
 318    // this is a normal token, so it doesn't matter what the previous token was. Add the white space
 319    // if we have it.
 320  0 if (startWhiteSpace != -1) {
 321  0 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
 322  0 startWhiteSpace = -1;
 323    }
 324    // this is not a decoded token.
 325  0 previousTokenEncoded = false;
 326  0 decodedText.append(word.substring(tokenStart));
 327    // we're finished.
 328  0 break;
 329    }
 330    else {
 331    // update our ticker
 332  0 decodeStart = tokenEnd + 2;
 333   
 334  0 String token = word.substring(tokenStart, tokenEnd);
 335  0 try {
 336    // if this gives a parsing failure, treat it like a non-encoded word.
 337  0 String decodedWord = decodeWord(token);
 338   
 339    // are any whitespace characters significant? Append 'em if we've got 'em.
 340  0 if (!previousTokenEncoded) {
 341  0 if (startWhiteSpace != -1) {
 342  0 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
 343  0 startWhiteSpace = -1;
 344    }
 345    }
 346    // this is definitely a decoded token.
 347  0 previousTokenEncoded = true;
 348    // and add this to the text.
 349  0 decodedText.append(decodedWord);
 350    // we continue parsing from here...we allow parsing errors to fall through
 351    // and get handled as normal text.
 352  0 continue;
 353   
 354    } catch (ParseException e) {
 355    }
 356    // this is a normal token, so it doesn't matter what the previous token was. Add the white space
 357    // if we have it.
 358  0 if (startWhiteSpace != -1) {
 359  0 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
 360  0 startWhiteSpace = -1;
 361    }
 362    // this is not a decoded token.
 363  0 previousTokenEncoded = false;
 364  0 decodedText.append(token);
 365    }
 366    }
 367    }
 368    }
 369    }
 370   
 371  0 return decodedText.toString();
 372    }
 373   
 374    /**
 375    * Parse a string using the RFC 2047 rules for an "encoded-word"
 376    * type. This encoding has the syntax:
 377    *
 378    * encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
 379    *
 380    * @param word The possibly encoded word value.
 381    *
 382    * @return The decoded word.
 383    * @exception ParseException
 384    * @exception UnsupportedEncodingException
 385    */
 386  9 public static String decodeWord(String word) throws ParseException, UnsupportedEncodingException {
 387    // encoded words start with the characters "=?". If this not an encoded word, we throw a
 388    // ParseException for the caller.
 389   
 390  9 if (!word.startsWith("=?")) {
 391  0 throw new ParseException("Invalid RFC 2047 encoded-word: " + word);
 392    }
 393   
 394  9 int charsetPos = word.indexOf('?', 2);
 395  9 if (charsetPos == -1) {
 396  0 throw new ParseException("Missing charset in RFC 2047 encoded-word: " + word);
 397    }
 398   
 399    // pull out the character set information (this is the MIME name at this point).
 400  9 String charset = word.substring(2, charsetPos).toLowerCase();
 401   
 402    // now pull out the encoding token the same way.
 403  9 int encodingPos = word.indexOf('?', charsetPos + 1);
 404  9 if (encodingPos == -1) {
 405  0 throw new ParseException("Missing encoding in RFC 2047 encoded-word: " + word);
 406    }
 407   
 408  9 String encoding = word.substring(charsetPos + 1, encodingPos);
 409   
 410    // and finally the encoded text.
 411  9 int encodedTextPos = word.indexOf("?=", encodingPos + 1);
 412  9 if (encodedTextPos == -1) {
 413  0 throw new ParseException("Missing encoded text in RFC 2047 encoded-word: " + word);
 414    }
 415   
 416  9 String encodedText = word.substring(encodingPos + 1, encodedTextPos);
 417   
 418    // seems a bit silly to encode a null string, but easy to deal with.
 419  9 if (encodedText.length() == 0) {
 420  0 return "";
 421    }
 422   
 423  9 try {
 424    // the decoder writes directly to an output stream.
 425  9 ByteArrayOutputStream out = new ByteArrayOutputStream(encodedText.length());
 426   
 427  9 byte[] encodedData = encodedText.getBytes("US-ASCII");
 428   
 429    // Base64 encoded?
 430  9 if (encoding.equals("B")) {
 431  2 Base64.decode(encodedData, out);
 432    }
 433    // maybe quoted printable.
 434  7 else if (encoding.equals("Q")) {
 435  7 QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
 436  7 dataEncoder.decodeWord(encodedData, out);
 437    }
 438    else {
 439  0 throw new UnsupportedEncodingException("Unknown RFC 2047 encoding: " + encoding);
 440    }
 441    // get the decoded byte data and convert into a string.
 442  9 byte[] decodedData = out.toByteArray();
 443  9 return new String(decodedData, javaCharset(charset));
 444    } catch (IOException e) {
 445  0 throw new UnsupportedEncodingException("Invalid RFC 2047 encoding");
 446    }
 447   
 448    }
 449   
 450    /**
 451    * Wrap an encoder around a given output stream.
 452    *
 453    * @param out The output stream to wrap.
 454    * @param encoding The name of the encoding.
 455    *
 456    * @return A instance of FilterOutputStream that manages on the fly
 457    * encoding for the requested encoding type.
 458    * @exception MessagingException
 459    */
 460  25 public static OutputStream encode(OutputStream out, String encoding) throws MessagingException {
 461    // no encoding specified, so assume it goes out unchanged.
 462  25 if (encoding == null) {
 463  5 return out;
 464    }
 465   
 466  20 encoding = encoding.toLowerCase();
 467   
 468    // some encodies are just pass-throughs, with no real decoding.
 469  20 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
 470  8 return out;
 471    }
 472  12 else if (encoding.equals("base64")) {
 473  4 return new Base64EncoderStream(out);
 474    }
 475    // UUEncode is known by a couple historical extension names too.
 476  8 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
 477  5 return new UUEncoderStream(out);
 478    }
 479  3 else if (encoding.equals("quoted-printable")) {
 480  3 return new QuotedPrintableEncoderStream(out);
 481    }
 482    else {
 483  0 throw new MessagingException("Unknown encoding " + encoding);
 484    }
 485    }
 486   
 487    /**
 488    * Wrap an encoder around a given output stream.
 489    *
 490    * @param out The output stream to wrap.
 491    * @param encoding The name of the encoding.
 492    * @param filename The filename of the data being sent (only used for UUEncode).
 493    *
 494    * @return A instance of FilterOutputStream that manages on the fly
 495    * encoding for the requested encoding type.
 496    * @exception MessagingException
 497    */
 498  0 public static OutputStream encode(OutputStream out, String encoding, String filename) throws MessagingException {
 499  0 encoding = encoding.toLowerCase();
 500   
 501    // some encodies are just pass-throughs, with no real decoding.
 502  0 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
 503  0 return out;
 504    }
 505  0 else if (encoding.equals("base64")) {
 506  0 return new Base64EncoderStream(out);
 507    }
 508    // UUEncode is known by a couple historical extension names too.
 509  0 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
 510  0 return new UUEncoderStream(out, filename);
 511    }
 512  0 else if (encoding.equals("quoted-printable")) {
 513  0 return new QuotedPrintableEncoderStream(out);
 514    }
 515    else {
 516  0 throw new MessagingException("Unknown encoding " + encoding);
 517    }
 518    }
 519   
 520   
 521  1 public static String encodeText(String word) throws UnsupportedEncodingException {
 522  1 return encodeText(word, null, null);
 523    }
 524   
 525  12 public static String encodeText(String word, String charset, String encoding) throws UnsupportedEncodingException {
 526  12 return encodeWord(word, charset, encoding, false);
 527    }
 528   
 529  66 public static String encodeWord(String word) throws UnsupportedEncodingException {
 530  66 return encodeWord(word, null, null);
 531    }
 532   
 533  68 public static String encodeWord(String word, String charset, String encoding) throws UnsupportedEncodingException {
 534  68 return encodeWord(word, charset, encoding, true);
 535    }
 536   
 537   
 538  80 private static String encodeWord(String word, String charset, String encoding, boolean encodingWord) throws UnsupportedEncodingException {
 539   
 540    // figure out what we need to encode this.
 541  80 String encoder = ASCIIUtil.getTextTransferEncoding(word);
 542    // all ascii? We can return this directly,
 543  80 if (encoder.equals("7bit")) {
 544  69 return word;
 545    }
 546   
 547    // if not given a charset, use the default.
 548  11 if (charset == null) {
 549  2 charset = getDefaultMIMECharset();
 550    }
 551   
 552    // sort out the encoder. If not explicitly given, use the best guess we've already established.
 553  11 if (encoding != null) {
 554  4 if (encoding.equalsIgnoreCase("B")) {
 555  2 encoder = "base64";
 556    }
 557  2 else if (encoding.equalsIgnoreCase("Q")) {
 558  2 encoder = "quoted-printable";
 559    }
 560    else {
 561  0 throw new UnsupportedEncodingException("Unknown transfer encoding: " + encoding);
 562    }
 563    }
 564   
 565  11 try {
 566    // get the string bytes in the correct source charset
 567  11 InputStream in = new ByteArrayInputStream(word.getBytes( javaCharset(charset)));
 568  11 ByteArrayOutputStream out = new ByteArrayOutputStream();
 569   
 570  11 if (encoder.equals("base64")) {
 571  2 Base64Encoder dataEncoder = new Base64Encoder();
 572  2 dataEncoder.encodeWord(in, charset, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false));
 573    }
 574    else {
 575  9 QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
 576  9 dataEncoder.encodeWord(in, charset, encodingWord ? QP_WORD_SPECIALS : QP_TEXT_SPECIALS, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false));
 577    }
 578   
 579  11 byte[] bytes = out.toByteArray();
 580  11 return new String(bytes);
 581    } catch (IOException e) {
 582  0 throw new UnsupportedEncodingException("Invalid encoding");
 583    }
 584    }
 585   
 586   
 587    /**
 588    * Examine the content of a data source and decide what type
 589    * of transfer encoding should be used. For text streams,
 590    * we'll decided between 7bit, quoted-printable, and base64.
 591    * For binary content types, we'll use either 7bit or base64.
 592    *
 593    * @param handler The DataHandler associated with the content.
 594    *
 595    * @return The string name of an encoding used to transfer the content.
 596    */
 597  4 public static String getEncoding(DataHandler handler) {
 598   
 599   
 600    // if this handler has an associated data source, we can read directly from the
 601    // data source to make this judgment. This is generally MUCH faster than asking the
 602    // DataHandler to write out the data for us.
 603  4 DataSource ds = handler.getDataSource();
 604  4 if (ds != null) {
 605  4 return getEncoding(ds);
 606    }
 607   
 608  0 try {
 609    // get a parser that allows us to make comparisons.
 610  0 ContentType content = new ContentType(ds.getContentType());
 611   
 612    // The only access to the content bytes at this point is by asking the handler to write
 613    // the information out to a stream. We're going to pipe this through a special stream
 614    // that examines the bytes as they go by.
 615  0 ContentCheckingOutputStream checker = new ContentCheckingOutputStream();
 616   
 617  0 handler.writeTo(checker);
 618   
 619    // figure this out based on whether we believe this to be a text type or not.
 620  0 if (content.match("text/*")) {
 621  0 return checker.getTextTransferEncoding();
 622    }
 623    else {
 624  0 return checker.getBinaryTransferEncoding();
 625    }
 626   
 627    } catch (Exception e) {
 628    // any unexpected I/O exceptions we'll force to a "safe" fallback position.
 629  0 return "base64";
 630    }
 631    }
 632   
 633   
 634    /**
 635    * Determine the what transfer encoding should be used for
 636    * data retrieved from a DataSource.
 637    *
 638    * @param source The DataSource for the transmitted data.
 639    *
 640    * @return The string name of the encoding form that should be used for
 641    * the data.
 642    */
 643  4 public static String getEncoding(DataSource source) {
 644  4 InputStream in = null;
 645   
 646  4 try {
 647    // get a parser that allows us to make comparisons.
 648  4 ContentType content = new ContentType(source.getContentType());
 649   
 650    // we're probably going to have to scan the data.
 651  4 in = source.getInputStream();
 652   
 653  4 if (!content.match("text/*")) {
 654    // Not purporting to be a text type? Examine the content to see we might be able to
 655    // at least pretend it is an ascii type.
 656  1 return ASCIIUtil.getBinaryTransferEncoding(in);
 657    }
 658    else {
 659  3 return ASCIIUtil.getTextTransferEncoding(in);
 660    }
 661    } catch (Exception e) {
 662    // this was a problem...not sure what makes sense here, so we'll assume it's binary
 663    // and we need to transfer this using Base64 encoding.
 664  0 return "base64";
 665    } finally {
 666    // make sure we close the stream
 667  4 try {
 668  4 if (in != null) {
 669  4 in.close();
 670    }
 671    } catch (IOException e) {
 672    }
 673    }
 674    }
 675   
 676   
 677    /**
 678    * Quote a "word" value. If the word contains any character from
 679    * the specified "specials" list, this value is returned as a
 680    * quoted strong. Otherwise, it is returned unchanged (an "atom").
 681    *
 682    * @param word The word requiring quoting.
 683    * @param specials The set of special characters that can't appear in an unquoted
 684    * string.
 685    *
 686    * @return The quoted value. This will be unchanged if the word doesn't contain
 687    * any of the designated special characters.
 688    */
 689  29 public static String quote(String word, String specials) {
 690  29 int wordLength = word.length();
 691  29 boolean requiresQuoting = false;
 692    // scan the string looking for problem characters
 693  29 for (int i =0; i < wordLength; i++) {
 694  154 char ch = word.charAt(i);
 695    // special escaped characters require escaping, which also implies quoting.
 696  154 if (escapedChars.indexOf(ch) >= 0) {
 697  4 return quoteAndEscapeString(word);
 698    }
 699    // now check for control characters or the designated special characters.
 700  150 if (ch < 32 || ch >= 127 || specials.indexOf(ch) >= 0) {
 701    // we know this requires quoting, but we still need to scan the entire string to
 702    // see if contains chars that require escaping. Just go ahead and treat it as if it does.
 703  12 return quoteAndEscapeString(word);
 704    }
 705    }
 706  13 return word;
 707    }
 708   
 709    /**
 710    * Take a string and return it as a formatted quoted string, with
 711    * all characters requiring escaping handled properly.
 712    *
 713    * @param word The string to quote.
 714    *
 715    * @return The quoted string.
 716    */
 717  16 private static String quoteAndEscapeString(String word) {
 718  16 int wordLength = word.length();
 719    // allocate at least enough for the string and two quotes plus a reasonable number of escaped chars.
 720  16 StringBuffer buffer = new StringBuffer(wordLength + 10);
 721    // add the leading quote.
 722  16 buffer.append('"');
 723   
 724  16 for (int i = 0; i < wordLength; i++) {
 725  266 char ch = word.charAt(i);
 726    // is this an escaped char?
 727  266 if (escapedChars.indexOf(ch) >= 0) {
 728    // add the escape marker before appending.
 729  4 buffer.append('\\');
 730    }
 731  266 buffer.append(ch);
 732    }
 733    // now the closing quote
 734  16 buffer.append('"');
 735  16 return buffer.toString();
 736    }
 737   
 738    /**
 739    * Translate a MIME standard character set name into the Java
 740    * equivalent.
 741    *
 742    * @param charset The MIME standard name.
 743    *
 744    * @return The Java equivalent for this name.
 745    */
 746  20 public static String javaCharset(String charset) {
 747    // nothing in, nothing out.
 748  20 if (charset == null) {
 749  0 return null;
 750    }
 751   
 752  20 String mappedCharset = (String)mime2java.get(charset.toLowerCase());
 753    // if there is no mapping, then the original name is used. Many of the MIME character set
 754    // names map directly back into Java. The reverse isn't necessarily true.
 755  20 return mappedCharset == null ? charset : mappedCharset;
 756    }
 757   
 758    /**
 759    * Map a Java character set name into the MIME equivalent.
 760    *
 761    * @param charset The java character set name.
 762    *
 763    * @return The MIME standard equivalent for this character set name.
 764    */
 765  2 public static String mimeCharset(String charset) {
 766    // nothing in, nothing out.
 767  2 if (charset == null) {
 768  0 return null;
 769    }
 770   
 771  2 String mappedCharset = (String)java2mime.get(charset.toLowerCase());
 772    // if there is no mapping, then the original name is used. Many of the MIME character set
 773    // names map directly back into Java. The reverse isn't necessarily true.
 774  2 return mappedCharset == null ? charset : mappedCharset;
 775    }
 776   
 777   
 778    /**
 779    * Get the default character set to use, in Java name format.
 780    * This either be the value set with the mail.mime.charset
 781    * system property or obtained from the file.encoding system
 782    * property. If neither of these is set, we fall back to
 783    * 8859_1 (basically US-ASCII).
 784    *
 785    * @return The character string value of the default character set.
 786    */
 787  0 public static String getDefaultJavaCharset() {
 788  0 String charset = SessionUtil.getProperty("mail.mime.charset");
 789  0 if (charset != null) {
 790  0 return javaCharset(charset);
 791    }
 792  0 return SessionUtil.getProperty("file.encoding", "8859_1");
 793    }
 794   
 795    /**
 796    * Get the default character set to use, in MIME name format.
 797    * This either be the value set with the mail.mime.charset
 798    * system property or obtained from the file.encoding system
 799    * property. If neither of these is set, we fall back to
 800    * 8859_1 (basically US-ASCII).
 801    *
 802    * @return The character string value of the default character set.
 803    */
 804  2 static String getDefaultMIMECharset() {
 805    // if the property is specified, this can be used directly.
 806  2 String charset = SessionUtil.getProperty("mail.mime.charset");
 807  2 if (charset != null) {
 808  0 return charset;
 809    }
 810   
 811    // get the Java-defined default and map back to a MIME name.
 812  2 return mimeCharset(SessionUtil.getProperty("file.encoding", "8859_1"));
 813    }
 814   
 815   
 816    /**
 817    * Load the default mapping tables used by the javaCharset()
 818    * and mimeCharset() methods. By default, these tables are
 819    * loaded from the /META-INF/javamail.charset.map file. If
 820    * something goes wrong loading that file, we configure things
 821    * with a default mapping table (which just happens to mimic
 822    * what's in the default mapping file).
 823    */
 824  1 static private void loadCharacterSetMappings() {
 825  1 java2mime = new HashMap();
 826  1 mime2java = new HashMap();
 827   
 828   
 829    // normally, these come from a character map file contained in the jar file.
 830  1 try {
 831  1 InputStream map = javax.mail.internet.MimeUtility.class.getResourceAsStream("/META-INF/javamail.charset.map");
 832   
 833  1 if (map != null) {
 834    // get a reader for this so we can load.
 835  1 BufferedReader reader = new BufferedReader(new InputStreamReader(map));
 836   
 837  1 readMappings(reader, java2mime);
 838  1 readMappings(reader, mime2java);
 839    }
 840    } catch (Exception e) {
 841    }
 842   
 843    // if any sort of error occurred reading the preferred file version, we could end up with empty
 844    // mapping tables. This could cause all sorts of difficulty, so ensure they are populated with at
 845    // least a reasonable set of defaults.
 846   
 847    // these mappings echo what's in the default file.
 848  1 if (java2mime.isEmpty()) {
 849  0 java2mime.put("8859_1", "ISO-8859-1");
 850  0 java2mime.put("iso8859_1", "ISO-8859-1");
 851  0 java2mime.put("iso8859-1", "ISO-8859-1");
 852   
 853  0 java2mime.put("8859_2", "ISO-8859-2");
 854  0 java2mime.put("iso8859_2", "ISO-8859-2");
 855  0 java2mime.put("iso8859-2", "ISO-8859-2");
 856   
 857  0 java2mime.put("8859_3", "ISO-8859-3");
 858  0 java2mime.put("iso8859_3", "ISO-8859-3");
 859  0 java2mime.put("iso8859-3", "ISO-8859-3");
 860   
 861  0 java2mime.put("8859_4", "ISO-8859-4");
 862  0 java2mime.put("iso8859_4", "ISO-8859-4");
 863  0 java2mime.put("iso8859-4", "ISO-8859-4");
 864   
 865  0 java2mime.put("8859_5", "ISO-8859-5");
 866  0 java2mime.put("iso8859_5", "ISO-8859-5");
 867  0 java2mime.put("iso8859-5", "ISO-8859-5");
 868   
 869  0 java2mime.put ("8859_6", "ISO-8859-6");
 870  0 java2mime.put("iso8859_6", "ISO-8859-6");
 871  0 java2mime.put("iso8859-6", "ISO-8859-6");
 872   
 873  0 java2mime.put("8859_7", "ISO-8859-7");
 874  0 java2mime.put("iso8859_7", "ISO-8859-7");
 875  0 java2mime.put("iso8859-7", "ISO-8859-7");
 876   
 877  0 java2mime.put("8859_8", "ISO-8859-8");
 878  0 java2mime.put("iso8859_8", "ISO-8859-8");
 879  0 java2mime.put("iso8859-8", "ISO-8859-8");
 880   
 881  0 java2mime.put("8859_9", "ISO-8859-9");
 882  0 java2mime.put("iso8859_9", "ISO-8859-9");
 883  0 java2mime.put("iso8859-9", "ISO-8859-9");
 884   
 885  0 java2mime.put("sjis", "Shift_JIS");
 886  0 java2mime.put ("jis", "ISO-2022-JP");
 887  0 java2mime.put("iso2022jp", "ISO-2022-JP");
 888  0 java2mime.put("euc_jp", "euc-jp");
 889  0 java2mime.put("koi8_r", "koi8-r");
 890  0 java2mime.put("euc_cn", "euc-cn");
 891  0 java2mime.put("euc_tw", "euc-tw");
 892  0 java2mime.put("euc_kr", "euc-kr");
 893    }
 894   
 895  1 if (mime2java.isEmpty ()) {
 896  0 mime2java.put("iso-2022-cn", "ISO2022CN");
 897  0 mime2java.put("iso-2022-kr", "ISO2022KR");
 898  0 mime2java.put("utf-8", "UTF8");
 899  0 mime2java.put("utf8", "UTF8");
 900  0 mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
 901  0 mime2java.put("ja_jp.eucjp", "EUCJIS");
 902  0 mime2java.put ("euc-kr", "KSC5601");
 903  0 mime2java.put("euckr", "KSC5601");
 904  0 mime2java.put("us-ascii", "ISO-8859-1");
 905  0 mime2java.put("x-us-ascii", "ISO-8859-1");
 906    }
 907    }
 908   
 909   
 910    /**
 911    * Read a section of a character map table and populate the
 912    * target mapping table with the information. The table end
 913    * is marked by a line starting with "--" and also ending with
 914    * "--". Blank lines and comment lines (beginning with '#') are
 915    * ignored.
 916    *
 917    * @param reader The source of the file information.
 918    * @param table The mapping table used to store the information.
 919    */
 920  2 static private void readMappings(BufferedReader reader, Map table) throws IOException {
 921    // process lines to the EOF or the end of table marker.
 922  2 while (true) {
 923  56 String line = reader.readLine();
 924    // no line returned is an EOF
 925  56 if (line == null) {
 926  1 return;
 927    }
 928   
 929    // trim so we're not messed up by trailing blanks
 930  55 line = line.trim();
 931   
 932  55 if (line.length() == 0 || line.startsWith("#")) {
 933  17 continue;
 934    }
 935   
 936    // stop processing if this is the end-of-table marker.
 937  38 if (line.startsWith("--") && line.endsWith("--")) {
 938  1 return;
 939    }
 940   
 941    // we allow either blanks or tabs as token delimiters.
 942  37 StringTokenizer tokenizer = new StringTokenizer(line, " \t");
 943   
 944  37 try {
 945  37 String from = tokenizer.nextToken().toLowerCase();
 946  37 String to = tokenizer.nextToken();
 947   
 948  37 table.put(from, to);
 949    } catch (NoSuchElementException e) {
 950    // just ignore the line if invalid.
 951    }
 952    }
 953    }
 954   
 955   
 956    }
 957   
 958   
 959    /**
 960    * Utility class for examining content information written out
 961    * by a DataHandler object. This stream gathers statistics on
 962    * the stream so it can make transfer encoding determinations.
 963    */
 964    class ContentCheckingOutputStream extends OutputStream {
 965    private int asciiChars = 0;
 966    private int nonAsciiChars = 0;
 967    private boolean containsLongLines = false;
 968    private boolean containsMalformedEOL = false;
 969    private int previousChar = 0;
 970    private int span = 0;
 971   
 972  0 ContentCheckingOutputStream() {
 973    }
 974   
 975  0 public void write(byte[] data) throws IOException {
 976  0 write(data, 0, data.length);
 977    }
 978   
 979  0 public void write(byte[] data, int offset, int length) throws IOException {
 980  0 for (int i = 0; i < length; i++) {
 981  0 write(data[offset + i]);
 982    }
 983    }
 984   
 985  0 public void write(int ch) {
 986    // we found a linebreak. Reset the line length counters on either one. We don't
 987    // really need to validate here.
 988  0 if (ch == '\n' || ch == '\r') {
 989    // we found a newline, this is only valid if the previous char was the '\r'
 990  0 if (ch == '\n') {
 991    // malformed linebreak? force this to base64 encoding.
 992  0 if (previousChar != '\r') {
 993  0 containsMalformedEOL = true;
 994    }
 995    }
 996    // hit a line end, reset our line length counter
 997  0 span = 0;
 998    }
 999    else {
 1000  0 span++;
 1001    // the text has long lines, we can't transfer this as unencoded text.
 1002  0 if (span > 998) {
 1003  0 containsLongLines = true;
 1004    }
 1005   
 1006    // non-ascii character, we have to transfer this in binary.
 1007  0 if (!ASCIIUtil.isAscii(ch)) {
 1008  0 nonAsciiChars++;
 1009    }
 1010    else {
 1011  0 asciiChars++;
 1012    }
 1013    }
 1014  0 previousChar = ch;
 1015    }
 1016   
 1017   
 1018  0 public String getBinaryTransferEncoding() {
 1019  0 if (nonAsciiChars != 0 || containsLongLines || containsMalformedEOL) {
 1020  0 return "base64";
 1021    }
 1022    else {
 1023  0 return "7bit";
 1024    }
 1025    }
 1026   
 1027  0 public String getTextTransferEncoding() {
 1028    // looking good so far, only valid chars here.
 1029  0 if (nonAsciiChars == 0) {
 1030    // does this contain long text lines? We need to use a Q-P encoding which will
 1031    // be only slightly longer, but handles folding the longer lines.
 1032  0 if (containsLongLines) {
 1033  0 return "quoted-printable";
 1034    }
 1035    else {
 1036    // ideal! Easiest one to handle.
 1037  0 return "7bit";
 1038    }
 1039    }
 1040    else {
 1041    // mostly characters requiring encoding? Base64 is our best bet.
 1042  0 if (nonAsciiChars > asciiChars) {
 1043  0 return "base64";
 1044    }
 1045    else {
 1046    // Q-P encoding will use fewer bytes than the full Base64.
 1047  0 return "quoted-printable";
 1048    }
 1049    }
 1050    }
 1051    }