Clover coverage report - Maven Clover report
Coverage timestamp: Sun Aug 20 2006 04:01:04 PDT
file stats: LOC: 471   Methods: 7
NCLOC: 256   Classes: 1
 
 Source file Conditionals Statements Methods TOTAL
ASCIIUtil.java 32% 38.8% 100% 37.8%
coverage coverage
 1    /**
 2    *
 3    * Copyright 2003-2004 The Apache Software Foundation
 4    *
 5    * Licensed under the Apache License, Version 2.0 (the "License");
 6    * you may not use this file except in compliance with the License.
 7    * You may obtain a copy of the License at
 8    *
 9    * http://www.apache.org/licenses/LICENSE-2.0
 10    *
 11    * Unless required by applicable law or agreed to in writing, software
 12    * distributed under the License is distributed on an "AS IS" BASIS,
 13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14    * See the License for the specific language governing permissions and
 15    * limitations under the License.
 16    */
 17   
 18    package org.apache.geronimo.mail.util;
 19   
 20    import java.io.BufferedInputStream;
 21    import java.io.InputStream;
 22    import java.io.IOException;
 23   
 24   
 25    /**
 26    * Set of utility classes for handling common encoding-related
 27    * manipulations.
 28    */
 29    public class ASCIIUtil {
 30    private static final String MIME_FOLDTEXT = "mail.mime.foldtext";
 31    private static final int FOLD_THRESHOLD = 76;
 32   
 33    /**
 34    * Test to see if this string contains only US-ASCII (i.e., 7-bit
 35    * ASCII) charactes.
 36    *
 37    * @param s The test string.
 38    *
 39    * @return true if this is a valid 7-bit ASCII encoding, false if it
 40    * contains any non-US ASCII characters.
 41    */
 42  1 static public boolean isAscii(String s) {
 43  1 for (int i = 0; i < s.length(); i++) {
 44  3 if (!isAscii(s.charAt(i))) {
 45  0 return false;
 46    }
 47    }
 48  1 return true;
 49    }
 50   
 51    /**
 52    * Test to see if a given character can be considered "valid" ASCII.
 53    * The excluded characters are the control characters less than
 54    * 32, 8-bit characters greater than 127, EXCEPT the CR, LF and
 55    * tab characters ARE considered value (all less than 32).
 56    *
 57    * @param ch The test character.
 58    *
 59    * @return true if this character meets the "ascii-ness" criteria, false
 60    * otherwise.
 61    */
 62  614 static public boolean isAscii(int ch) {
 63    // these are explicitly considered valid.
 64  614 if (ch == '\r' || ch == '\n' || ch == '\t') {
 65  2 return true;
 66    }
 67   
 68    // anything else outside the range is just plain wrong.
 69  612 if (ch >= 127 || ch < 32) {
 70  30 return false;
 71    }
 72  582 return true;
 73    }
 74   
 75   
 76    /**
 77    * Examine a stream of text and make a judgement on what encoding
 78    * type should be used for the text. Ideally, we want to use 7bit
 79    * encoding to determine this, but we may need to use either quoted-printable
 80    * or base64. The choice is made on the ratio of 7-bit characters to non-7bit.
 81    *
 82    * @param content An input stream for the content we're examining.
 83    *
 84    * @exception IOException
 85    */
 86  3 public static String getTextTransferEncoding(InputStream content) throws IOException {
 87   
 88    // for efficiency, we'll read in blocks.
 89  3 BufferedInputStream in = new BufferedInputStream(content, 4096);
 90   
 91  3 int span = 0; // span of characters without a line break.
 92  3 boolean containsLongLines = false;
 93  3 int asciiChars = 0;
 94  3 int nonAsciiChars = 0;
 95   
 96  3 while (true) {
 97  33 int ch = in.read();
 98    // if we hit an EOF here, go decide what type we've actually found.
 99  33 if (ch == -1) {
 100  3 break;
 101    }
 102   
 103    // we found a linebreak. Reset the line length counters on either one. We don't
 104    // really need to validate here.
 105  30 if (ch == '\n' || ch == '\r') {
 106    // hit a line end, reset our line length counter
 107  0 span = 0;
 108    }
 109    else {
 110  30 span++;
 111    // the text has long lines, we can't transfer this as unencoded text.
 112  30 if (span > 998) {
 113  0 containsLongLines = true;
 114    }
 115   
 116    // non-ascii character, we have to transfer this in binary.
 117  30 if (!isAscii(ch)) {
 118  0 nonAsciiChars++;
 119    }
 120    else {
 121  30 asciiChars++;
 122    }
 123    }
 124    }
 125   
 126    // looking good so far, only valid chars here.
 127  3 if (nonAsciiChars == 0) {
 128    // does this contain long text lines? We need to use a Q-P encoding which will
 129    // be only slightly longer, but handles folding the longer lines.
 130  3 if (containsLongLines) {
 131  0 return "quoted-printable";
 132    }
 133    else {
 134    // ideal! Easiest one to handle.
 135  3 return "7bit";
 136    }
 137    }
 138    else {
 139    // mostly characters requiring encoding? Base64 is our best bet.
 140  0 if (nonAsciiChars > asciiChars) {
 141  0 return "base64";
 142    }
 143    else {
 144    // Q-P encoding will use fewer bytes than the full Base64.
 145  0 return "quoted-printable";
 146    }
 147    }
 148    }
 149   
 150   
 151    /**
 152    * Examine a stream of text and make a judgement on what encoding
 153    * type should be used for the text. Ideally, we want to use 7bit
 154    * encoding to determine this, but we may need to use either quoted-printable
 155    * or base64. The choice is made on the ratio of 7-bit characters to non-7bit.
 156    *
 157    * @param content A string for the content we're examining.
 158    */
 159  80 public static String getTextTransferEncoding(String content) {
 160   
 161  80 int asciiChars = 0;
 162  80 int nonAsciiChars = 0;
 163   
 164  80 for (int i = 0; i < content.length(); i++) {
 165  580 int ch = content.charAt(i);
 166   
 167    // non-ascii character, we have to transfer this in binary.
 168  580 if (!isAscii(ch)) {
 169  29 nonAsciiChars++;
 170    }
 171    else {
 172  551 asciiChars++;
 173    }
 174    }
 175   
 176    // looking good so far, only valid chars here.
 177  80 if (nonAsciiChars == 0) {
 178    // ideal! Easiest one to handle.
 179  69 return "7bit";
 180    }
 181    else {
 182    // mostly characters requiring encoding? Base64 is our best bet.
 183  11 if (nonAsciiChars > asciiChars) {
 184  0 return "base64";
 185    }
 186    else {
 187    // Q-P encoding will use fewer bytes than the full Base64.
 188  11 return "quoted-printable";
 189    }
 190    }
 191    }
 192   
 193   
 194    /**
 195    * Determine if the transfer encoding looks like it might be
 196    * valid ascii text, and thus transferable as 7bit code. In
 197    * order for this to be true, all characters must be valid
 198    * 7-bit ASCII code AND all line breaks must be properly formed
 199    * (JUST '\r\n' sequences). 7-bit transfers also
 200    * typically have a line limit of 1000 bytes (998 + the CRLF), so any
 201    * stretch of charactes longer than that will also force Base64 encoding.
 202    *
 203    * @param content An input stream for the content we're examining.
 204    *
 205    * @exception IOException
 206    */
 207  1 public static String getBinaryTransferEncoding(InputStream content) throws IOException {
 208   
 209    // for efficiency, we'll read in blocks.
 210  1 BufferedInputStream in = new BufferedInputStream(content, 4096);
 211   
 212  1 int previousChar = 0;
 213  1 int span = 0; // span of characters without a line break.
 214   
 215  1 while (true) {
 216  1 int ch = in.read();
 217    // if we hit an EOF here, we've only found valid text so far, so we can transfer this as
 218    // 7-bit ascii.
 219  1 if (ch == -1) {
 220  0 return "7bit";
 221    }
 222   
 223    // we found a newline, this is only valid if the previous char was the '\r'
 224  1 if (ch == '\n') {
 225    // malformed linebreak? force this to base64 encoding.
 226  0 if (previousChar != '\r') {
 227  0 return "base64";
 228    }
 229    // hit a line end, reset our line length counter
 230  0 span = 0;
 231    }
 232    else {
 233  1 span++;
 234    // the text has long lines, we can't transfer this as unencoded text.
 235  1 if (span > 998) {
 236  0 return "base64";
 237    }
 238   
 239    // non-ascii character, we have to transfer this in binary.
 240  1 if (!isAscii(ch)) {
 241  1 return "base64";
 242    }
 243    }
 244  0 previousChar = ch;
 245    }
 246    }
 247   
 248   
 249    /**
 250    * Perform RFC 2047 text folding on a string of text.
 251    *
 252    * @param used The amount of text already "used up" on this line. This is
 253    * typically the length of a message header that this text
 254    * get getting added to.
 255    * @param s The text to fold.
 256    *
 257    * @return The input text, with linebreaks inserted at appropriate fold points.
 258    */
 259  8 public static String fold(int used, String s) {
 260    // if folding is disable, unfolding is also. Return the string unchanged.
 261  8 if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) {
 262  0 return s;
 263    }
 264   
 265  8 int end;
 266   
 267    // now we need to strip off any trailing "whitespace", where whitespace is blanks, tabs,
 268    // and line break characters.
 269  8 for (end = s.length() - 1; end >= 0; end--) {
 270  8 int ch = s.charAt(end);
 271  8 if (ch != ' ' && ch != '\t' ) {
 272  8 break;
 273    }
 274    }
 275   
 276    // did we actually find something to remove? Shorten the String to the trimmed length
 277  8 if (end != s.length() - 1) {
 278  0 s = s.substring(0, end + 1);
 279    }
 280   
 281    // does the string as it exists now not require folding? We can just had that back right off.
 282  8 if (s.length() + used <= FOLD_THRESHOLD) {
 283  8 return s;
 284    }
 285   
 286    // get a buffer for the length of the string, plus room for a few line breaks.
 287    // these are soft line breaks, so we generally need more that just the line breaks (an escape +
 288    // CR + LF + leading space on next line);
 289  0 StringBuffer newString = new StringBuffer(s.length() + 8);
 290   
 291   
 292    // now keep chopping this down until we've accomplished what we need.
 293  0 while (used + s.length() > FOLD_THRESHOLD) {
 294  0 int breakPoint = -1;
 295  0 char breakChar = 0;
 296   
 297    // now scan for the next place where we can break.
 298  0 for (int i = 0; i < s.length(); i++) {
 299    // have we passed the fold limit?
 300  0 if (used + i > FOLD_THRESHOLD) {
 301    // if we've already seen a blank, then stop now. Otherwise
 302    // we keep going until we hit a fold point.
 303  0 if (breakPoint != -1) {
 304  0 break;
 305    }
 306    }
 307  0 char ch = s.charAt(i);
 308   
 309    // a white space character?
 310  0 if (ch == ' ' || ch == '\t') {
 311    // this might be a run of white space, so skip over those now.
 312  0 breakPoint = i;
 313    // we need to maintain the same character type after the inserted linebreak.
 314  0 breakChar = ch;
 315  0 i++;
 316  0 while (i < s.length()) {
 317  0 ch = s.charAt(i);
 318  0 if (ch != ' ' && ch != '\t') {
 319  0 break;
 320    }
 321  0 i++;
 322    }
 323    }
 324    // found an embedded new line. Escape this so that the unfolding process preserves it.
 325  0 else if (ch == '\n') {
 326  0 newString.append('\\');
 327  0 newString.append('\n');
 328    }
 329  0 else if (ch == '\r') {
 330  0 newString.append('\\');
 331  0 newString.append('\n');
 332  0 i++;
 333    // if this is a CRLF pair, add the second char also
 334  0 if (i < s.length() && s.charAt(i) == '\n') {
 335  0 newString.append('\r');
 336    }
 337    }
 338   
 339    }
 340    // no fold point found, we punt, append the remainder and leave.
 341  0 if (breakPoint == -1) {
 342  0 newString.append(s);
 343  0 return newString.toString();
 344    }
 345  0 newString.append(s.substring(0, breakPoint));
 346  0 newString.append("\r\n");
 347  0 newString.append(breakChar);
 348    // chop the string
 349  0 s = s.substring(breakPoint + 1);
 350    // start again, and we've used the first char of the limit already with the whitespace char.
 351  0 used = 1;
 352    }
 353   
 354    // add on the remainder, and return
 355  0 newString.append(s);
 356  0 return newString.toString();
 357    }
 358   
 359    /**
 360    * Unfold a folded string. The unfolding process will remove
 361    * any line breaks that are not escaped and which are also followed
 362    * by whitespace characters.
 363    *
 364    * @param s The folded string.
 365    *
 366    * @return A new string with unfolding rules applied.
 367    */
 368  6 public static String unfold(String s) {
 369    // if folding is disable, unfolding is also. Return the string unchanged.
 370  6 if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) {
 371  0 return s;
 372    }
 373   
 374    // if there are no line break characters in the string, we can just return this.
 375  6 if (s.indexOf('\n') < 0 && s.indexOf('\r') < 0) {
 376  6 return s;
 377    }
 378   
 379    // we need to scan and fix things up.
 380  0 int length = s.length();
 381   
 382  0 StringBuffer newString = new StringBuffer(length);
 383   
 384    // scan the entire string
 385  0 for (int i = 0; i < length; i++) {
 386  0 int ch = s.charAt(i);
 387   
 388    // we have a backslash. In folded strings, escape characters are only processed as such if
 389    // they preceed line breaks. Otherwise, we leave it be.
 390  0 if (ch == '\\') {
 391    // escape at the very end? Just add the character.
 392  0 if (i == length - 1) {
 393  0 newString.append(ch);
 394    }
 395    else {
 396  0 int nextChar = s.charAt(i + 1);
 397   
 398    // naked newline? Add the new line to the buffer, and skip the escape char.
 399  0 if (nextChar == '\n') {
 400  0 newString.append('\n');
 401  0 i++;
 402    }
 403  0 else if (nextChar == '\r') {
 404    // just the CR left? Add it, removing the escape.
 405  0 if (i == length - 2 || s.charAt(i + 2) != '\r') {
 406  0 newString.append('\r');
 407  0 i++;
 408    }
 409    else {
 410    // toss the escape, add both parts of the CRLF, and skip over two chars.
 411  0 newString.append('\r');
 412  0 newString.append('\n');
 413  0 i += 2;
 414    }
 415    }
 416    else {
 417    // an escape for another purpose, just copy it over.
 418  0 newString.append(ch);
 419    }
 420    }
 421    }
 422    // we have an unescaped line break
 423  0 else if (ch == '\n' || ch == '\r') {
 424    // remember the position in case we need to backtrack.
 425  0 int lineBreak = i;
 426  0 boolean CRLF = false;
 427   
 428  0 if (ch == '\r') {
 429    // check to see if we need to step over this.
 430  0 if (i < length - 1 && s.charAt(i + 1) == '\n') {
 431  0 i++;
 432    // flag the type so we know what we might need to preserve.
 433  0 CRLF = true;
 434    }
 435    }
 436   
 437    // get a temp position scanner.
 438  0 int scan = i + 1;
 439   
 440    // does a blank follow this new line? we need to scrap the new line and reduce the leading blanks
 441    // down to a single blank.
 442  0 if (scan < length && s.charAt(scan) == ' ') {
 443    // add the character
 444  0 newString.append(' ');
 445   
 446    // scan over the rest of the blanks
 447  0 i = scan + 1;
 448  0 while (i < length && s.charAt(i) == ' ') {
 449  0 i++;
 450    }
 451    // we'll increment down below, so back up to the last blank as the current char.
 452  0 i--;
 453    }
 454    else {
 455    // we must keep this line break. Append the appropriate style.
 456  0 if (CRLF) {
 457  0 newString.append("\r\n");
 458    }
 459    else {
 460  0 newString.append(ch);
 461    }
 462    }
 463    }
 464    else {
 465    // just a normal, ordinary character
 466  0 newString.append(ch);
 467    }
 468    }
 469  0 return newString.toString();
 470    }
 471    }