Clover coverage report - Maven Clover report
Coverage timestamp: Sun Aug 20 2006 04:01:44 PDT
file stats: LOC: 1,275   Methods: 30
NCLOC: 737   Classes: 2
 
 Source file Conditionals Statements Methods TOTAL
MimeUtility.java 48.3% 50.8% 70% 50.8%
coverage coverage
 1    /**
 2    *
 3    * Copyright 2003-2006 The Apache Software Foundation
 4    *
 5    * Licensed under the Apache License, Version 2.0 (the "License");
 6    * you may not use this file except in compliance with the License.
 7    * You may obtain a copy of the License at
 8    *
 9    * http://www.apache.org/licenses/LICENSE-2.0
 10    *
 11    * Unless required by applicable law or agreed to in writing, software
 12    * distributed under the License is distributed on an "AS IS" BASIS,
 13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14    * See the License for the specific language governing permissions and
 15    * limitations under the License.
 16    */
 17   
 18    package javax.mail.internet;
 19   
 20    import java.io.BufferedInputStream;
 21    import java.io.BufferedReader;
 22    import java.io.ByteArrayInputStream;
 23    import java.io.ByteArrayOutputStream;
 24    import java.io.IOException;
 25    import java.io.InputStream;
 26    import java.io.InputStreamReader;
 27    import java.io.OutputStream;
 28    import java.io.UnsupportedEncodingException;
 29    import java.util.HashMap;
 30    import java.util.Map;
 31    import java.util.NoSuchElementException;
 32    import java.util.StringTokenizer;
 33   
 34    import javax.activation.DataHandler;
 35    import javax.activation.DataSource;
 36    import javax.mail.MessagingException;
 37   
 38    import org.apache.geronimo.mail.util.ASCIIUtil;
 39    import org.apache.geronimo.mail.util.Base64;
 40    import org.apache.geronimo.mail.util.Base64DecoderStream;
 41    import org.apache.geronimo.mail.util.Base64Encoder;
 42    import org.apache.geronimo.mail.util.Base64EncoderStream;
 43    import org.apache.geronimo.mail.util.QuotedPrintableDecoderStream;
 44    import org.apache.geronimo.mail.util.QuotedPrintableEncoderStream;
 45    import org.apache.geronimo.mail.util.QuotedPrintableEncoder;
 46    import org.apache.geronimo.mail.util.QuotedPrintable;
 47    import org.apache.geronimo.mail.util.SessionUtil;
 48    import org.apache.geronimo.mail.util.UUDecoderStream;
 49    import org.apache.geronimo.mail.util.UUEncoderStream;
 50   
 51    // encodings include "base64", "quoted-printable", "7bit", "8bit" and "binary".
 52    // In addition, "uuencode" is also supported. The
 53   
 54    /**
 55    * @version $Rev: 421852 $ $Date: 2006-07-14 03:02:19 -0700 (Fri, 14 Jul 2006) $
 56    */
 57    public class MimeUtility {
 58   
 59    private static final String MIME_FOLDENCODEDWORDS = "mail.mime.foldencodedwords";
 60    private static final String MIME_DECODE_TEXT_STRICT = "mail.mime.decodetext.strict";
 61    private static final String MIME_FOLDTEXT = "mail.mime.foldtext";
 62    private static final int FOLD_THRESHOLD = 76;
 63   
 64  0 private MimeUtility() {
 65    }
 66   
 67    public static final int ALL = -1;
 68   
 69    private static String defaultJavaCharset;
 70    private static String escapedChars = "\"\\\r\n";
 71    private static String linearWhiteSpace = " \t\r\n";
 72   
 73    private static String QP_WORD_SPECIALS = "=_?\"#$%&'(),.:;<>@[\\]^`{|}~";
 74    private static String QP_TEXT_SPECIALS = "=_?";
 75   
 76    // the javamail spec includes the ability to map java encoding names to MIME-specified names. Normally,
 77    // these values are loaded from a character mapping file.
 78    private static Map java2mime;
 79    private static Map mime2java;
 80   
 81    static {
 82    // we need to load the mapping tables used by javaCharset() and mimeCharset().
 83  1 loadCharacterSetMappings();
 84    }
 85   
 86  18 public static InputStream decode(InputStream in, String encoding) throws MessagingException {
 87  18 encoding = encoding.toLowerCase();
 88   
 89    // some encodies are just pass-throughs, with no real decoding.
 90  18 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
 91  6 return in;
 92    }
 93  12 else if (encoding.equals("base64")) {
 94  4 return new Base64DecoderStream(in);
 95    }
 96    // UUEncode is known by a couple historical extension names too.
 97  8 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
 98  5 return new UUDecoderStream(in);
 99    }
 100  3 else if (encoding.equals("quoted-printable")) {
 101  3 return new QuotedPrintableDecoderStream(in);
 102    }
 103    else {
 104  0 throw new MessagingException("Unknown encoding " + encoding);
 105    }
 106    }
 107   
 108    /**
 109    * Decode a string of text obtained from a mail header into
 110    * it's proper form. The text generally will consist of a
 111    * string of tokens, some of which may be encoded using
 112    * base64 encoding.
 113    *
 114    * @param text The text to decode.
 115    *
 116    * @return The decoded test string.
 117    * @exception UnsupportedEncodingException
 118    */
 119  9 public static String decodeText(String text) throws UnsupportedEncodingException {
 120    // if the text contains any encoded tokens, those tokens will be marked with "=?". If the
 121    // source string doesn't contain that sequent, no decoding is required.
 122  9 if (text.indexOf("=?") < 0) {
 123  3 return text;
 124    }
 125   
 126    // we have two sets of rules we can apply.
 127  6 if (!SessionUtil.getBooleanProperty(MIME_DECODE_TEXT_STRICT, true)) {
 128  0 return decodeTextNonStrict(text);
 129    }
 130   
 131  6 int offset = 0;
 132  6 int endOffset = text.length();
 133   
 134  6 int startWhiteSpace = -1;
 135  6 int endWhiteSpace = -1;
 136   
 137  6 StringBuffer decodedText = new StringBuffer(text.length());
 138   
 139  6 boolean previousTokenEncoded = false;
 140   
 141  6 while (offset < endOffset) {
 142  6 char ch = text.charAt(offset);
 143   
 144    // is this a whitespace character?
 145  6 if (linearWhiteSpace.indexOf(ch) != -1) {
 146  0 startWhiteSpace = offset;
 147  0 while (offset < endOffset) {
 148    // step over the white space characters.
 149  0 ch = text.charAt(offset);
 150  0 if (linearWhiteSpace.indexOf(ch) != -1) {
 151  0 offset++;
 152    }
 153    else {
 154    // record the location of the first non lwsp and drop down to process the
 155    // token characters.
 156  0 endWhiteSpace = offset;
 157  0 break;
 158    }
 159    }
 160    }
 161    else {
 162    // we have a word token. We need to scan over the word and then try to parse it.
 163  6 int wordStart = offset;
 164   
 165  6 while (offset < endOffset) {
 166    // step over the white space characters.
 167  201 ch = text.charAt(offset);
 168  201 if (linearWhiteSpace.indexOf(ch) == -1) {
 169  201 offset++;
 170    }
 171    else {
 172  0 break;
 173    }
 174   
 175    //NB: Trailing whitespace on these header strings will just be discarded.
 176    }
 177    // pull out the word token.
 178  6 String word = text.substring(wordStart, offset);
 179    // is the token encoded? decode the word
 180  6 if (word.startsWith("=?")) {
 181  6 try {
 182    // if this gives a parsing failure, treat it like a non-encoded word.
 183  6 String decodedWord = decodeWord(word);
 184   
 185    // are any whitespace characters significant? Append 'em if we've got 'em.
 186  6 if (!previousTokenEncoded) {
 187  6 if (startWhiteSpace != -1) {
 188  0 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
 189  0 startWhiteSpace = -1;
 190    }
 191    }
 192    // this is definitely a decoded token.
 193  6 previousTokenEncoded = true;
 194    // and add this to the text.
 195  6 decodedText.append(decodedWord);
 196    // we continue parsing from here...we allow parsing errors to fall through
 197    // and get handled as normal text.
 198  6 continue;
 199   
 200    } catch (ParseException e) {
 201    }
 202    }
 203    // this is a normal token, so it doesn't matter what the previous token was. Add the white space
 204    // if we have it.
 205  0 if (startWhiteSpace != -1) {
 206  0 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
 207  0 startWhiteSpace = -1;
 208    }
 209    // this is not a decoded token.
 210  0 previousTokenEncoded = false;
 211  0 decodedText.append(word);
 212    }
 213    }
 214   
 215  6 return decodedText.toString();
 216    }
 217   
 218   
 219    /**
 220    * Decode a string of text obtained from a mail header into
 221    * it's proper form. The text generally will consist of a
 222    * string of tokens, some of which may be encoded using
 223    * base64 encoding. This is for non-strict decoded for mailers that
 224    * violate the RFC 2047 restriction that decoded tokens must be delimited
 225    * by linear white space. This will scan tokens looking for inner tokens
 226    * enclosed in "=?" -- "?=" pairs.
 227    *
 228    * @param text The text to decode.
 229    *
 230    * @return The decoded test string.
 231    * @exception UnsupportedEncodingException
 232    */
 233  0 private static String decodeTextNonStrict(String text) throws UnsupportedEncodingException {
 234  0 int offset = 0;
 235  0 int endOffset = text.length();
 236   
 237  0 int startWhiteSpace = -1;
 238  0 int endWhiteSpace = -1;
 239   
 240  0 StringBuffer decodedText = new StringBuffer(text.length());
 241   
 242  0 boolean previousTokenEncoded = false;
 243   
 244  0 while (offset < endOffset) {
 245  0 char ch = text.charAt(offset);
 246   
 247    // is this a whitespace character?
 248  0 if (linearWhiteSpace.indexOf(ch) != -1) {
 249  0 startWhiteSpace = offset;
 250  0 while (offset < endOffset) {
 251    // step over the white space characters.
 252  0 ch = text.charAt(offset);
 253  0 if (linearWhiteSpace.indexOf(ch) != -1) {
 254  0 offset++;
 255    }
 256    else {
 257    // record the location of the first non lwsp and drop down to process the
 258    // token characters.
 259  0 endWhiteSpace = offset;
 260  0 break;
 261    }
 262    }
 263    }
 264    else {
 265    // we're at the start of a word token. We potentially need to break this up into subtokens
 266  0 int wordStart = offset;
 267   
 268  0 while (offset < endOffset) {
 269    // step over the white space characters.
 270  0 ch = text.charAt(offset);
 271  0 if (linearWhiteSpace.indexOf(ch) == -1) {
 272  0 offset++;
 273    }
 274    else {
 275  0 break;
 276    }
 277   
 278    //NB: Trailing whitespace on these header strings will just be discarded.
 279    }
 280    // pull out the word token.
 281  0 String word = text.substring(wordStart, offset);
 282   
 283  0 int decodeStart = 0;
 284   
 285    // now scan and process each of the bits within here.
 286  0 while (decodeStart < word.length()) {
 287  0 int tokenStart = word.indexOf("=?", decodeStart);
 288  0 if (tokenStart == -1) {
 289    // this is a normal token, so it doesn't matter what the previous token was. Add the white space
 290    // if we have it.
 291  0 if (startWhiteSpace != -1) {
 292  0 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
 293  0 startWhiteSpace = -1;
 294    }
 295    // this is not a decoded token.
 296  0 previousTokenEncoded = false;
 297  0 decodedText.append(word.substring(decodeStart));
 298    // we're finished.
 299  0 break;
 300    }
 301    // we have something to process
 302    else {
 303    // we might have a normal token preceeding this.
 304  0 if (tokenStart != decodeStart) {
 305    // this is a normal token, so it doesn't matter what the previous token was. Add the white space
 306    // if we have it.
 307  0 if (startWhiteSpace != -1) {
 308  0 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
 309  0 startWhiteSpace = -1;
 310    }
 311    // this is not a decoded token.
 312  0 previousTokenEncoded = false;
 313  0 decodedText.append(word.substring(decodeStart, tokenStart));
 314    }
 315   
 316    // now find the end marker.
 317  0 int tokenEnd = word.indexOf("?=", tokenStart);
 318    // sigh, an invalid token. Treat this as plain text.
 319  0 if (tokenEnd == -1) {
 320    // this is a normal token, so it doesn't matter what the previous token was. Add the white space
 321    // if we have it.
 322  0 if (startWhiteSpace != -1) {
 323  0 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
 324  0 startWhiteSpace = -1;
 325    }
 326    // this is not a decoded token.
 327  0 previousTokenEncoded = false;
 328  0 decodedText.append(word.substring(tokenStart));
 329    // we're finished.
 330  0 break;
 331    }
 332    else {
 333    // update our ticker
 334  0 decodeStart = tokenEnd + 2;
 335   
 336  0 String token = word.substring(tokenStart, tokenEnd);
 337  0 try {
 338    // if this gives a parsing failure, treat it like a non-encoded word.
 339  0 String decodedWord = decodeWord(token);
 340   
 341    // are any whitespace characters significant? Append 'em if we've got 'em.
 342  0 if (!previousTokenEncoded) {
 343  0 if (startWhiteSpace != -1) {
 344  0 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
 345  0 startWhiteSpace = -1;
 346    }
 347    }
 348    // this is definitely a decoded token.
 349  0 previousTokenEncoded = true;
 350    // and add this to the text.
 351  0 decodedText.append(decodedWord);
 352    // we continue parsing from here...we allow parsing errors to fall through
 353    // and get handled as normal text.
 354  0 continue;
 355   
 356    } catch (ParseException e) {
 357    }
 358    // this is a normal token, so it doesn't matter what the previous token was. Add the white space
 359    // if we have it.
 360  0 if (startWhiteSpace != -1) {
 361  0 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
 362  0 startWhiteSpace = -1;
 363    }
 364    // this is not a decoded token.
 365  0 previousTokenEncoded = false;
 366  0 decodedText.append(token);
 367    }
 368    }
 369    }
 370    }
 371    }
 372   
 373  0 return decodedText.toString();
 374    }
 375   
 376    /**
 377    * Parse a string using the RFC 2047 rules for an "encoded-word"
 378    * type. This encoding has the syntax:
 379    *
 380    * encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
 381    *
 382    * @param word The possibly encoded word value.
 383    *
 384    * @return The decoded word.
 385    * @exception ParseException
 386    * @exception UnsupportedEncodingException
 387    */
 388  9 public static String decodeWord(String word) throws ParseException, UnsupportedEncodingException {
 389    // encoded words start with the characters "=?". If this not an encoded word, we throw a
 390    // ParseException for the caller.
 391   
 392  9 if (!word.startsWith("=?")) {
 393  0 throw new ParseException("Invalid RFC 2047 encoded-word: " + word);
 394    }
 395   
 396  9 int charsetPos = word.indexOf('?', 2);
 397  9 if (charsetPos == -1) {
 398  0 throw new ParseException("Missing charset in RFC 2047 encoded-word: " + word);
 399    }
 400   
 401    // pull out the character set information (this is the MIME name at this point).
 402  9 String charset = word.substring(2, charsetPos).toLowerCase();
 403   
 404    // now pull out the encoding token the same way.
 405  9 int encodingPos = word.indexOf('?', charsetPos + 1);
 406  9 if (encodingPos == -1) {
 407  0 throw new ParseException("Missing encoding in RFC 2047 encoded-word: " + word);
 408    }
 409   
 410  9 String encoding = word.substring(charsetPos + 1, encodingPos);
 411   
 412    // and finally the encoded text.
 413  9 int encodedTextPos = word.indexOf("?=", encodingPos + 1);
 414  9 if (encodedTextPos == -1) {
 415  0 throw new ParseException("Missing encoded text in RFC 2047 encoded-word: " + word);
 416    }
 417   
 418  9 String encodedText = word.substring(encodingPos + 1, encodedTextPos);
 419   
 420    // seems a bit silly to encode a null string, but easy to deal with.
 421  9 if (encodedText.length() == 0) {
 422  0 return "";
 423    }
 424   
 425  9 try {
 426    // the decoder writes directly to an output stream.
 427  9 ByteArrayOutputStream out = new ByteArrayOutputStream(encodedText.length());
 428   
 429  9 byte[] encodedData = encodedText.getBytes("US-ASCII");
 430   
 431    // Base64 encoded?
 432  9 if (encoding.equals("B")) {
 433  2 Base64.decode(encodedData, out);
 434    }
 435    // maybe quoted printable.
 436  7 else if (encoding.equals("Q")) {
 437  7 QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
 438  7 dataEncoder.decodeWord(encodedData, out);
 439    }
 440    else {
 441  0 throw new UnsupportedEncodingException("Unknown RFC 2047 encoding: " + encoding);
 442    }
 443    // get the decoded byte data and convert into a string.
 444  9 byte[] decodedData = out.toByteArray();
 445  9 return new String(decodedData, javaCharset(charset));
 446    } catch (IOException e) {
 447  0 throw new UnsupportedEncodingException("Invalid RFC 2047 encoding");
 448    }
 449   
 450    }
 451   
 452    /**
 453    * Wrap an encoder around a given output stream.
 454    *
 455    * @param out The output stream to wrap.
 456    * @param encoding The name of the encoding.
 457    *
 458    * @return A instance of FilterOutputStream that manages on the fly
 459    * encoding for the requested encoding type.
 460    * @exception MessagingException
 461    */
 462  29 public static OutputStream encode(OutputStream out, String encoding) throws MessagingException {
 463    // no encoding specified, so assume it goes out unchanged.
 464  29 if (encoding == null) {
 465  6 return out;
 466    }
 467   
 468  23 encoding = encoding.toLowerCase();
 469   
 470    // some encodies are just pass-throughs, with no real decoding.
 471  23 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
 472  11 return out;
 473    }
 474  12 else if (encoding.equals("base64")) {
 475  4 return new Base64EncoderStream(out);
 476    }
 477    // UUEncode is known by a couple historical extension names too.
 478  8 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
 479  5 return new UUEncoderStream(out);
 480    }
 481  3 else if (encoding.equals("quoted-printable")) {
 482  3 return new QuotedPrintableEncoderStream(out);
 483    }
 484    else {
 485  0 throw new MessagingException("Unknown encoding " + encoding);
 486    }
 487    }
 488   
 489    /**
 490    * Wrap an encoder around a given output stream.
 491    *
 492    * @param out The output stream to wrap.
 493    * @param encoding The name of the encoding.
 494    * @param filename The filename of the data being sent (only used for UUEncode).
 495    *
 496    * @return A instance of FilterOutputStream that manages on the fly
 497    * encoding for the requested encoding type.
 498    * @exception MessagingException
 499    */
 500  0 public static OutputStream encode(OutputStream out, String encoding, String filename) throws MessagingException {
 501  0 encoding = encoding.toLowerCase();
 502   
 503    // some encodies are just pass-throughs, with no real decoding.
 504  0 if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
 505  0 return out;
 506    }
 507  0 else if (encoding.equals("base64")) {
 508  0 return new Base64EncoderStream(out);
 509    }
 510    // UUEncode is known by a couple historical extension names too.
 511  0 else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
 512  0 return new UUEncoderStream(out, filename);
 513    }
 514  0 else if (encoding.equals("quoted-printable")) {
 515  0 return new QuotedPrintableEncoderStream(out);
 516    }
 517    else {
 518  0 throw new MessagingException("Unknown encoding " + encoding);
 519    }
 520    }
 521   
 522   
 523  1 public static String encodeText(String word) throws UnsupportedEncodingException {
 524  1 return encodeText(word, null, null);
 525    }
 526   
 527  14 public static String encodeText(String word, String charset, String encoding) throws UnsupportedEncodingException {
 528  14 return encodeWord(word, charset, encoding, false);
 529    }
 530   
 531  66 public static String encodeWord(String word) throws UnsupportedEncodingException {
 532  66 return encodeWord(word, null, null);
 533    }
 534   
 535  68 public static String encodeWord(String word, String charset, String encoding) throws UnsupportedEncodingException {
 536  68 return encodeWord(word, charset, encoding, true);
 537    }
 538   
 539   
 540  82 private static String encodeWord(String word, String charset, String encoding, boolean encodingWord) throws UnsupportedEncodingException {
 541   
 542    // figure out what we need to encode this.
 543  82 String encoder = ASCIIUtil.getTextTransferEncoding(word);
 544    // all ascii? We can return this directly,
 545  82 if (encoder.equals("7bit")) {
 546  71 return word;
 547    }
 548   
 549    // if not given a charset, use the default.
 550  11 if (charset == null) {
 551  2 charset = getDefaultMIMECharset();
 552    }
 553   
 554    // sort out the encoder. If not explicitly given, use the best guess we've already established.
 555  11 if (encoding != null) {
 556  4 if (encoding.equalsIgnoreCase("B")) {
 557  2 encoder = "base64";
 558    }
 559  2 else if (encoding.equalsIgnoreCase("Q")) {
 560  2 encoder = "quoted-printable";
 561    }
 562    else {
 563  0 throw new UnsupportedEncodingException("Unknown transfer encoding: " + encoding);
 564    }
 565    }
 566   
 567  11 try {
 568    // get the string bytes in the correct source charset
 569  11 InputStream in = new ByteArrayInputStream(word.getBytes( javaCharset(charset)));
 570  11 ByteArrayOutputStream out = new ByteArrayOutputStream();
 571   
 572  11 if (encoder.equals("base64")) {
 573  2 Base64Encoder dataEncoder = new Base64Encoder();
 574  2 dataEncoder.encodeWord(in, charset, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false));
 575    }
 576    else {
 577  9 QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
 578  9 dataEncoder.encodeWord(in, charset, encodingWord ? QP_WORD_SPECIALS : QP_TEXT_SPECIALS, out, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false));
 579    }
 580   
 581  11 byte[] bytes = out.toByteArray();
 582  11 return new String(bytes);
 583    } catch (IOException e) {
 584  0 throw new UnsupportedEncodingException("Invalid encoding");
 585    }
 586    }
 587   
 588   
 589    /**
 590    * Examine the content of a data source and decide what type
 591    * of transfer encoding should be used. For text streams,
 592    * we'll decided between 7bit, quoted-printable, and base64.
 593    * For binary content types, we'll use either 7bit or base64.
 594    *
 595    * @param handler The DataHandler associated with the content.
 596    *
 597    * @return The string name of an encoding used to transfer the content.
 598    */
 599  11 public static String getEncoding(DataHandler handler) {
 600   
 601   
 602    // if this handler has an associated data source, we can read directly from the
 603    // data source to make this judgment. This is generally MUCH faster than asking the
 604    // DataHandler to write out the data for us.
 605  11 DataSource ds = handler.getDataSource();
 606  11 if (ds != null) {
 607  11 return getEncoding(ds);
 608    }
 609   
 610  0 try {
 611    // get a parser that allows us to make comparisons.
 612  0 ContentType content = new ContentType(ds.getContentType());
 613   
 614    // The only access to the content bytes at this point is by asking the handler to write
 615    // the information out to a stream. We're going to pipe this through a special stream
 616    // that examines the bytes as they go by.
 617  0 ContentCheckingOutputStream checker = new ContentCheckingOutputStream();
 618   
 619  0 handler.writeTo(checker);
 620   
 621    // figure this out based on whether we believe this to be a text type or not.
 622  0 if (content.match("text/*")) {
 623  0 return checker.getTextTransferEncoding();
 624    }
 625    else {
 626  0 return checker.getBinaryTransferEncoding();
 627    }
 628   
 629    } catch (Exception e) {
 630    // any unexpected I/O exceptions we'll force to a "safe" fallback position.
 631  0 return "base64";
 632    }
 633    }
 634   
 635   
 636    /**
 637    * Determine the what transfer encoding should be used for
 638    * data retrieved from a DataSource.
 639    *
 640    * @param source The DataSource for the transmitted data.
 641    *
 642    * @return The string name of the encoding form that should be used for
 643    * the data.
 644    */
 645  17 public static String getEncoding(DataSource source) {
 646  17 InputStream in = null;
 647   
 648  17 try {
 649    // get a parser that allows us to make comparisons.
 650  17 ContentType content = new ContentType(source.getContentType());
 651   
 652    // we're probably going to have to scan the data.
 653  17 in = source.getInputStream();
 654   
 655  16 if (!content.match("text/*")) {
 656    // Not purporting to be a text type? Examine the content to see we might be able to
 657    // at least pretend it is an ascii type.
 658  6 return ASCIIUtil.getBinaryTransferEncoding(in);
 659    }
 660    else {
 661  10 return ASCIIUtil.getTextTransferEncoding(in);
 662    }
 663    } catch (Exception e) {
 664    // this was a problem...not sure what makes sense here, so we'll assume it's binary
 665    // and we need to transfer this using Base64 encoding.
 666  1 return "base64";
 667    } finally {
 668    // make sure we close the stream
 669  17 try {
 670  17 if (in != null) {
 671  16 in.close();
 672    }
 673    } catch (IOException e) {
 674    }
 675    }
 676    }
 677   
 678   
 679    /**
 680    * Quote a "word" value. If the word contains any character from
 681    * the specified "specials" list, this value is returned as a
 682    * quoted strong. Otherwise, it is returned unchanged (an "atom").
 683    *
 684    * @param word The word requiring quoting.
 685    * @param specials The set of special characters that can't appear in an unquoted
 686    * string.
 687    *
 688    * @return The quoted value. This will be unchanged if the word doesn't contain
 689    * any of the designated special characters.
 690    */
 691  46 public static String quote(String word, String specials) {
 692  46 int wordLength = word.length();
 693  46 boolean requiresQuoting = false;
 694    // scan the string looking for problem characters
 695  46 for (int i =0; i < wordLength; i++) {
 696  351 char ch = word.charAt(i);
 697    // special escaped characters require escaping, which also implies quoting.
 698  351 if (escapedChars.indexOf(ch) >= 0) {
 699  4 return quoteAndEscapeString(word);
 700    }
 701    // now check for control characters or the designated special characters.
 702  347 if (ch < 32 || ch >= 127 || specials.indexOf(ch) >= 0) {
 703    // we know this requires quoting, but we still need to scan the entire string to
 704    // see if contains chars that require escaping. Just go ahead and treat it as if it does.
 705  15 return quoteAndEscapeString(word);
 706    }
 707    }
 708  27 return word;
 709    }
 710   
 711    /**
 712    * Take a string and return it as a formatted quoted string, with
 713    * all characters requiring escaping handled properly.
 714    *
 715    * @param word The string to quote.
 716    *
 717    * @return The quoted string.
 718    */
 719  19 private static String quoteAndEscapeString(String word) {
 720  19 int wordLength = word.length();
 721    // allocate at least enough for the string and two quotes plus a reasonable number of escaped chars.
 722  19 StringBuffer buffer = new StringBuffer(wordLength + 10);
 723    // add the leading quote.
 724  19 buffer.append('"');
 725   
 726  19 for (int i = 0; i < wordLength; i++) {
 727  367 char ch = word.charAt(i);
 728    // is this an escaped char?
 729  367 if (escapedChars.indexOf(ch) >= 0) {
 730    // add the escape marker before appending.
 731  4 buffer.append('\\');
 732    }
 733  367 buffer.append(ch);
 734    }
 735    // now the closing quote
 736  19 buffer.append('"');
 737  19 return buffer.toString();
 738    }
 739   
 740    /**
 741    * Translate a MIME standard character set name into the Java
 742    * equivalent.
 743    *
 744    * @param charset The MIME standard name.
 745    *
 746    * @return The Java equivalent for this name.
 747    */
 748  26 public static String javaCharset(String charset) {
 749    // nothing in, nothing out.
 750  26 if (charset == null) {
 751  0 return null;
 752    }
 753   
 754  26 String mappedCharset = (String)mime2java.get(charset.toLowerCase());
 755    // if there is no mapping, then the original name is used. Many of the MIME character set
 756    // names map directly back into Java. The reverse isn't necessarily true.
 757  26 return mappedCharset == null ? charset : mappedCharset;
 758    }
 759   
 760    /**
 761    * Map a Java character set name into the MIME equivalent.
 762    *
 763    * @param charset The java character set name.
 764    *
 765    * @return The MIME standard equivalent for this character set name.
 766    */
 767  3 public static String mimeCharset(String charset) {
 768    // nothing in, nothing out.
 769  3 if (charset == null) {
 770  0 return null;
 771    }
 772   
 773  3 String mappedCharset = (String)java2mime.get(charset.toLowerCase());
 774    // if there is no mapping, then the original name is used. Many of the MIME character set
 775    // names map directly back into Java. The reverse isn't necessarily true.
 776  3 return mappedCharset == null ? charset : mappedCharset;
 777    }
 778   
 779   
 780    /**
 781    * Get the default character set to use, in Java name format.
 782    * This either be the value set with the mail.mime.charset
 783    * system property or obtained from the file.encoding system
 784    * property. If neither of these is set, we fall back to
 785    * 8859_1 (basically US-ASCII).
 786    *
 787    * @return The character string value of the default character set.
 788    */
 789  3 public static String getDefaultJavaCharset() {
 790  3 String charset = SessionUtil.getProperty("mail.mime.charset");
 791  3 if (charset != null) {
 792  0 return javaCharset(charset);
 793    }
 794  3 return SessionUtil.getProperty("file.encoding", "8859_1");
 795    }
 796   
 797    /**
 798    * Get the default character set to use, in MIME name format.
 799    * This either be the value set with the mail.mime.charset
 800    * system property or obtained from the file.encoding system
 801    * property. If neither of these is set, we fall back to
 802    * 8859_1 (basically US-ASCII).
 803    *
 804    * @return The character string value of the default character set.
 805    */
 806  3 static String getDefaultMIMECharset() {
 807    // if the property is specified, this can be used directly.
 808  3 String charset = SessionUtil.getProperty("mail.mime.charset");
 809  3 if (charset != null) {
 810  0 return charset;
 811    }
 812   
 813    // get the Java-defined default and map back to a MIME name.
 814  3 return mimeCharset(SessionUtil.getProperty("file.encoding", "8859_1"));
 815    }
 816   
 817   
 818    /**
 819    * Load the default mapping tables used by the javaCharset()
 820    * and mimeCharset() methods. By default, these tables are
 821    * loaded from the /META-INF/javamail.charset.map file. If
 822    * something goes wrong loading that file, we configure things
 823    * with a default mapping table (which just happens to mimic
 824    * what's in the default mapping file).
 825    */
 826  1 static private void loadCharacterSetMappings() {
 827  1 java2mime = new HashMap();
 828  1 mime2java = new HashMap();
 829   
 830   
 831    // normally, these come from a character map file contained in the jar file.
 832  1 try {
 833  1 InputStream map = javax.mail.internet.MimeUtility.class.getResourceAsStream("/META-INF/javamail.charset.map");
 834   
 835  1 if (map != null) {
 836    // get a reader for this so we can load.
 837  1 BufferedReader reader = new BufferedReader(new InputStreamReader(map));
 838   
 839  1 readMappings(reader, java2mime);
 840  1 readMappings(reader, mime2java);
 841    }
 842    } catch (Exception e) {
 843    }
 844   
 845    // if any sort of error occurred reading the preferred file version, we could end up with empty
 846    // mapping tables. This could cause all sorts of difficulty, so ensure they are populated with at
 847    // least a reasonable set of defaults.
 848   
 849    // these mappings echo what's in the default file.
 850  1 if (java2mime.isEmpty()) {
 851  0 java2mime.put("8859_1", "ISO-8859-1");
 852  0 java2mime.put("iso8859_1", "ISO-8859-1");
 853  0 java2mime.put("iso8859-1", "ISO-8859-1");
 854   
 855  0 java2mime.put("8859_2", "ISO-8859-2");
 856  0 java2mime.put("iso8859_2", "ISO-8859-2");
 857  0 java2mime.put("iso8859-2", "ISO-8859-2");
 858   
 859  0 java2mime.put("8859_3", "ISO-8859-3");
 860  0 java2mime.put("iso8859_3", "ISO-8859-3");
 861  0 java2mime.put("iso8859-3", "ISO-8859-3");
 862   
 863  0 java2mime.put("8859_4", "ISO-8859-4");
 864  0 java2mime.put("iso8859_4", "ISO-8859-4");
 865  0 java2mime.put("iso8859-4", "ISO-8859-4");
 866   
 867  0 java2mime.put("8859_5", "ISO-8859-5");
 868  0 java2mime.put("iso8859_5", "ISO-8859-5");
 869  0 java2mime.put("iso8859-5", "ISO-8859-5");
 870   
 871  0 java2mime.put ("8859_6", "ISO-8859-6");
 872  0 java2mime.put("iso8859_6", "ISO-8859-6");
 873  0 java2mime.put("iso8859-6", "ISO-8859-6");
 874   
 875  0 java2mime.put("8859_7", "ISO-8859-7");
 876  0 java2mime.put("iso8859_7", "ISO-8859-7");
 877  0 java2mime.put("iso8859-7", "ISO-8859-7");
 878   
 879  0 java2mime.put("8859_8", "ISO-8859-8");
 880  0 java2mime.put("iso8859_8", "ISO-8859-8");
 881  0 java2mime.put("iso8859-8", "ISO-8859-8");
 882   
 883  0 java2mime.put("8859_9", "ISO-8859-9");
 884  0 java2mime.put("iso8859_9", "ISO-8859-9");
 885  0 java2mime.put("iso8859-9", "ISO-8859-9");
 886   
 887  0 java2mime.put("sjis", "Shift_JIS");
 888  0 java2mime.put ("jis", "ISO-2022-JP");
 889  0 java2mime.put("iso2022jp", "ISO-2022-JP");
 890  0 java2mime.put("euc_jp", "euc-jp");
 891  0 java2mime.put("koi8_r", "koi8-r");
 892  0 java2mime.put("euc_cn", "euc-cn");
 893  0 java2mime.put("euc_tw", "euc-tw");
 894  0 java2mime.put("euc_kr", "euc-kr");
 895    }
 896   
 897  1 if (mime2java.isEmpty ()) {
 898  0 mime2java.put("iso-2022-cn", "ISO2022CN");
 899  0 mime2java.put("iso-2022-kr", "ISO2022KR");
 900  0 mime2java.put("utf-8", "UTF8");
 901  0 mime2java.put("utf8", "UTF8");
 902  0 mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
 903  0 mime2java.put("ja_jp.eucjp", "EUCJIS");
 904  0 mime2java.put ("euc-kr", "KSC5601");
 905  0 mime2java.put("euckr", "KSC5601");
 906  0 mime2java.put("us-ascii", "ISO-8859-1");
 907  0 mime2java.put("x-us-ascii", "ISO-8859-1");
 908    }
 909    }
 910   
 911   
 912    /**
 913    * Read a section of a character map table and populate the
 914    * target mapping table with the information. The table end
 915    * is marked by a line starting with "--" and also ending with
 916    * "--". Blank lines and comment lines (beginning with '#') are
 917    * ignored.
 918    *
 919    * @param reader The source of the file information.
 920    * @param table The mapping table used to store the information.
 921    */
 922  2 static private void readMappings(BufferedReader reader, Map table) throws IOException {
 923    // process lines to the EOF or the end of table marker.
 924  2 while (true) {
 925  56 String line = reader.readLine();
 926    // no line returned is an EOF
 927  56 if (line == null) {
 928  1 return;
 929    }
 930   
 931    // trim so we're not messed up by trailing blanks
 932  55 line = line.trim();
 933   
 934  55 if (line.length() == 0 || line.startsWith("#")) {
 935  17 continue;
 936    }
 937   
 938    // stop processing if this is the end-of-table marker.
 939  38 if (line.startsWith("--") && line.endsWith("--")) {
 940  1 return;
 941    }
 942   
 943    // we allow either blanks or tabs as token delimiters.
 944  37 StringTokenizer tokenizer = new StringTokenizer(line, " \t");
 945   
 946  37 try {
 947  37 String from = tokenizer.nextToken().toLowerCase();
 948  37 String to = tokenizer.nextToken();
 949   
 950  37 table.put(from, to);
 951    } catch (NoSuchElementException e) {
 952    // just ignore the line if invalid.
 953    }
 954    }
 955    }
 956   
 957   
 958    /**
 959    * Perform RFC 2047 text folding on a string of text.
 960    *
 961    * @param used The amount of text already "used up" on this line. This is
 962    * typically the length of a message header that this text
 963    * get getting added to.
 964    * @param s The text to fold.
 965    *
 966    * @return The input text, with linebreaks inserted at appropriate fold points.
 967    */
 968  14 public static String fold(int used, String s) {
 969    // if folding is disable, unfolding is also. Return the string unchanged.
 970  14 if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) {
 971  0 return s;
 972    }
 973   
 974  14 int end;
 975   
 976    // now we need to strip off any trailing "whitespace", where whitespace is blanks, tabs,
 977    // and line break characters.
 978  14 for (end = s.length() - 1; end >= 0; end--) {
 979  14 int ch = s.charAt(end);
 980  14 if (ch != ' ' && ch != '\t' ) {
 981  14 break;
 982    }
 983    }
 984   
 985    // did we actually find something to remove? Shorten the String to the trimmed length
 986  14 if (end != s.length() - 1) {
 987  0 s = s.substring(0, end + 1);
 988    }
 989   
 990    // does the string as it exists now not require folding? We can just had that back right off.
 991  14 if (s.length() + used <= FOLD_THRESHOLD) {
 992  11 return s;
 993    }
 994   
 995    // get a buffer for the length of the string, plus room for a few line breaks.
 996    // these are soft line breaks, so we generally need more that just the line breaks (an escape +
 997    // CR + LF + leading space on next line);
 998  3 StringBuffer newString = new StringBuffer(s.length() + 8);
 999   
 1000   
 1001    // now keep chopping this down until we've accomplished what we need.
 1002  3 while (used + s.length() > FOLD_THRESHOLD) {
 1003  4 int breakPoint = -1;
 1004  4 char breakChar = 0;
 1005   
 1006    // now scan for the next place where we can break.
 1007  271 for (int i = 0; i < s.length(); i++) {
 1008    // have we passed the fold limit?
 1009  271 if (used + i > FOLD_THRESHOLD) {
 1010    // if we've already seen a blank, then stop now. Otherwise
 1011    // we keep going until we hit a fold point.
 1012  67 if (breakPoint != -1) {
 1013  4 break;
 1014    }
 1015    }
 1016  267 char ch = s.charAt(i);
 1017   
 1018    // a white space character?
 1019  267 if (ch == ' ' || ch == '\t') {
 1020    // this might be a run of white space, so skip over those now.
 1021  35 breakPoint = i;
 1022    // we need to maintain the same character type after the inserted linebreak.
 1023  35 breakChar = ch;
 1024  35 i++;
 1025  35 while (i < s.length()) {
 1026  35 ch = s.charAt(i);
 1027  35 if (ch != ' ' && ch != '\t') {
 1028  35 break;
 1029    }
 1030  0 i++;
 1031    }
 1032    }
 1033    // found an embedded new line. Escape this so that the unfolding process preserves it.
 1034  232 else if (ch == '\n') {
 1035  0 newString.append('\\');
 1036  0 newString.append('\n');
 1037    }
 1038  232 else if (ch == '\r') {
 1039  0 newString.append('\\');
 1040  0 newString.append('\n');
 1041  0 i++;
 1042    // if this is a CRLF pair, add the second char also
 1043  0 if (i < s.length() && s.charAt(i) == '\n') {
 1044  0 newString.append('\r');
 1045    }
 1046    }
 1047   
 1048    }
 1049    // no fold point found, we punt, append the remainder and leave.
 1050  4 if (breakPoint == -1) {
 1051  0 newString.append(s);
 1052  0 return newString.toString();
 1053    }
 1054  4 newString.append(s.substring(0, breakPoint));
 1055  4 newString.append("\r\n");
 1056  4 newString.append(breakChar);
 1057    // chop the string
 1058  4 s = s.substring(breakPoint + 1);
 1059    // start again, and we've used the first char of the limit already with the whitespace char.
 1060  4 used = 1;
 1061    }
 1062   
 1063    // add on the remainder, and return
 1064  3 newString.append(s);
 1065  3 return newString.toString();
 1066    }
 1067   
 1068    /**
 1069    * Unfold a folded string. The unfolding process will remove
 1070    * any line breaks that are not escaped and which are also followed
 1071    * by whitespace characters.
 1072    *
 1073    * @param s The folded string.
 1074    *
 1075    * @return A new string with unfolding rules applied.
 1076    */
 1077  10 public static String unfold(String s) {
 1078    // if folding is disable, unfolding is also. Return the string unchanged.
 1079  10 if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) {
 1080  0 return s;
 1081    }
 1082   
 1083    // if there are no line break characters in the string, we can just return this.
 1084  10 if (s.indexOf('\n') < 0 && s.indexOf('\r') < 0) {
 1085  7 return s;
 1086    }
 1087   
 1088    // we need to scan and fix things up.
 1089  3 int length = s.length();
 1090   
 1091  3 StringBuffer newString = new StringBuffer(length);
 1092   
 1093    // scan the entire string
 1094  3 for (int i = 0; i < length; i++) {
 1095  414 char ch = s.charAt(i);
 1096   
 1097    // we have a backslash. In folded strings, escape characters are only processed as such if
 1098    // they preceed line breaks. Otherwise, we leave it be.
 1099  414 if (ch == '\\') {
 1100    // escape at the very end? Just add the character.
 1101  0 if (i == length - 1) {
 1102  0 newString.append(ch);
 1103    }
 1104    else {
 1105  0 int nextChar = s.charAt(i + 1);
 1106   
 1107    // naked newline? Add the new line to the buffer, and skip the escape char.
 1108  0 if (nextChar == '\n') {
 1109  0 newString.append('\n');
 1110  0 i++;
 1111    }
 1112  0 else if (nextChar == '\r') {
 1113    // just the CR left? Add it, removing the escape.
 1114  0 if (i == length - 2 || s.charAt(i + 2) != '\r') {
 1115  0 newString.append('\r');
 1116  0 i++;
 1117    }
 1118    else {
 1119    // toss the escape, add both parts of the CRLF, and skip over two chars.
 1120  0 newString.append('\r');
 1121  0 newString.append('\n');
 1122  0 i += 2;
 1123    }
 1124    }
 1125    else {
 1126    // an escape for another purpose, just copy it over.
 1127  0 newString.append(ch);
 1128    }
 1129    }
 1130    }
 1131    // we have an unescaped line break
 1132  414 else if (ch == '\n' || ch == '\r') {
 1133    // remember the position in case we need to backtrack.
 1134  4 int lineBreak = i;
 1135  4 boolean CRLF = false;
 1136   
 1137  4 if (ch == '\r') {
 1138    // check to see if we need to step over this.
 1139  4 if (i < length - 1 && s.charAt(i + 1) == '\n') {
 1140  4 i++;
 1141    // flag the type so we know what we might need to preserve.
 1142  4 CRLF = true;
 1143    }
 1144    }
 1145   
 1146    // get a temp position scanner.
 1147  4 int scan = i + 1;
 1148   
 1149    // does a blank follow this new line? we need to scrap the new line and reduce the leading blanks
 1150    // down to a single blank.
 1151  4 if (scan < length && s.charAt(scan) == ' ') {
 1152    // add the character
 1153  4 newString.append(' ');
 1154   
 1155    // scan over the rest of the blanks
 1156  4 i = scan + 1;
 1157  4 while (i < length && s.charAt(i) == ' ') {
 1158  0 i++;
 1159    }
 1160    // we'll increment down below, so back up to the last blank as the current char.
 1161  4 i--;
 1162    }
 1163    else {
 1164    // we must keep this line break. Append the appropriate style.
 1165  0 if (CRLF) {
 1166  0 newString.append("\r\n");
 1167    }
 1168    else {
 1169  0 newString.append(ch);
 1170    }
 1171    }
 1172    }
 1173    else {
 1174    // just a normal, ordinary character
 1175  410 newString.append(ch);
 1176    }
 1177    }
 1178  3 return newString.toString();
 1179    }
 1180    }
 1181   
 1182   
 1183    /**
 1184    * Utility class for examining content information written out
 1185    * by a DataHandler object. This stream gathers statistics on
 1186    * the stream so it can make transfer encoding determinations.
 1187    */
 1188    class ContentCheckingOutputStream extends OutputStream {
 1189    private int asciiChars = 0;
 1190    private int nonAsciiChars = 0;
 1191    private boolean containsLongLines = false;
 1192    private boolean containsMalformedEOL = false;
 1193    private int previousChar = 0;
 1194    private int span = 0;
 1195   
 1196  0 ContentCheckingOutputStream() {
 1197    }
 1198   
 1199  0 public void write(byte[] data) throws IOException {
 1200  0 write(data, 0, data.length);
 1201    }
 1202   
 1203  0 public void write(byte[] data, int offset, int length) throws IOException {
 1204  0 for (int i = 0; i < length; i++) {
 1205  0 write(data[offset + i]);
 1206    }
 1207    }
 1208   
 1209  0 public void write(int ch) {
 1210    // we found a linebreak. Reset the line length counters on either one. We don't
 1211    // really need to validate here.
 1212  0 if (ch == '\n' || ch == '\r') {
 1213    // we found a newline, this is only valid if the previous char was the '\r'
 1214  0 if (ch == '\n') {
 1215    // malformed linebreak? force this to base64 encoding.
 1216  0 if (previousChar != '\r') {
 1217  0 containsMalformedEOL = true;
 1218    }
 1219    }
 1220    // hit a line end, reset our line length counter
 1221  0 span = 0;
 1222    }
 1223    else {
 1224  0 span++;
 1225    // the text has long lines, we can't transfer this as unencoded text.
 1226  0 if (span > 998) {
 1227  0 containsLongLines = true;
 1228    }
 1229   
 1230    // non-ascii character, we have to transfer this in binary.
 1231  0 if (!ASCIIUtil.isAscii(ch)) {
 1232  0 nonAsciiChars++;
 1233    }
 1234    else {
 1235  0 asciiChars++;
 1236    }
 1237    }
 1238  0 previousChar = ch;
 1239    }
 1240   
 1241   
 1242  0 public String getBinaryTransferEncoding() {
 1243  0 if (nonAsciiChars != 0 || containsLongLines || containsMalformedEOL) {
 1244  0 return "base64";
 1245    }
 1246    else {
 1247  0 return "7bit";
 1248    }
 1249    }
 1250   
 1251  0 public String getTextTransferEncoding() {
 1252    // looking good so far, only valid chars here.
 1253  0 if (nonAsciiChars == 0) {
 1254    // does this contain long text lines? We need to use a Q-P encoding which will
 1255    // be only slightly longer, but handles folding the longer lines.
 1256  0 if (containsLongLines) {
 1257  0 return "quoted-printable";
 1258    }
 1259    else {
 1260    // ideal! Easiest one to handle.
 1261  0 return "7bit";
 1262    }
 1263    }
 1264    else {
 1265    // mostly characters requiring encoding? Base64 is our best bet.
 1266  0 if (nonAsciiChars > asciiChars) {
 1267  0 return "base64";
 1268    }
 1269    else {
 1270    // Q-P encoding will use fewer bytes than the full Base64.
 1271  0 return "quoted-printable";
 1272    }
 1273    }
 1274    }
 1275    }