| 
  |||||||||||||||||||
| Source file | Conditionals | Statements | Methods | TOTAL | |||||||||||||||
| ASCIIUtil.java | 32% | 38.8% | 100% | 37.8% | 
             | 
  ||||||||||||||
| 1 | /** | |
| 2 | * | |
| 3 | * Copyright 2003-2004 The Apache Software Foundation | |
| 4 | * | |
| 5 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
| 6 | * you may not use this file except in compliance with the License. | |
| 7 | * You may obtain a copy of the License at | |
| 8 | * | |
| 9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
| 10 | * | |
| 11 | * Unless required by applicable law or agreed to in writing, software | |
| 12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 | * See the License for the specific language governing permissions and | |
| 15 | * limitations under the License. | |
| 16 | */ | |
| 17 | ||
| 18 | package org.apache.geronimo.mail.util; | |
| 19 | ||
| 20 | import java.io.BufferedInputStream; | |
| 21 | import java.io.InputStream; | |
| 22 | import java.io.IOException; | |
| 23 | ||
| 24 | ||
| 25 | /** | |
| 26 | * Set of utility classes for handling common encoding-related | |
| 27 | * manipulations. | |
| 28 | */ | |
| 29 | public class ASCIIUtil { | |
| 30 | private static final String MIME_FOLDTEXT = "mail.mime.foldtext"; | |
| 31 | private static final int FOLD_THRESHOLD = 76; | |
| 32 | ||
| 33 | /** | |
| 34 | * Test to see if this string contains only US-ASCII (i.e., 7-bit | |
| 35 | * ASCII) charactes. | |
| 36 | * | |
| 37 | * @param s The test string. | |
| 38 | * | |
| 39 | * @return true if this is a valid 7-bit ASCII encoding, false if it | |
| 40 | * contains any non-US ASCII characters. | |
| 41 | */ | |
| 42 | 1 | static public boolean isAscii(String s) { | 
| 43 | 1 | for (int i = 0; i < s.length(); i++) { | 
| 44 | 3 | if (!isAscii(s.charAt(i))) { | 
| 45 | 0 | return false; | 
| 46 | } | |
| 47 | } | |
| 48 | 1 | return true; | 
| 49 | } | |
| 50 | ||
| 51 | /** | |
| 52 | * Test to see if a given character can be considered "valid" ASCII. | |
| 53 | * The excluded characters are the control characters less than | |
| 54 | * 32, 8-bit characters greater than 127, EXCEPT the CR, LF and | |
| 55 | * tab characters ARE considered value (all less than 32). | |
| 56 | * | |
| 57 | * @param ch The test character. | |
| 58 | * | |
| 59 | * @return true if this character meets the "ascii-ness" criteria, false | |
| 60 | * otherwise. | |
| 61 | */ | |
| 62 | 614 | static public boolean isAscii(int ch) { | 
| 63 | // these are explicitly considered valid. | |
| 64 | 614 | if (ch == '\r' || ch == '\n' || ch == '\t') { | 
| 65 | 2 | return true; | 
| 66 | } | |
| 67 | ||
| 68 | // anything else outside the range is just plain wrong. | |
| 69 | 612 | if (ch >= 127 || ch < 32) { | 
| 70 | 30 | return false; | 
| 71 | } | |
| 72 | 582 | return true; | 
| 73 | } | |
| 74 | ||
| 75 | ||
| 76 | /** | |
| 77 | * Examine a stream of text and make a judgement on what encoding | |
| 78 | * type should be used for the text. Ideally, we want to use 7bit | |
| 79 | * encoding to determine this, but we may need to use either quoted-printable | |
| 80 | * or base64. The choice is made on the ratio of 7-bit characters to non-7bit. | |
| 81 | * | |
| 82 | * @param content An input stream for the content we're examining. | |
| 83 | * | |
| 84 | * @exception IOException | |
| 85 | */ | |
| 86 | 3 | public static String getTextTransferEncoding(InputStream content) throws IOException { | 
| 87 | ||
| 88 | // for efficiency, we'll read in blocks. | |
| 89 | 3 | BufferedInputStream in = new BufferedInputStream(content, 4096); | 
| 90 | ||
| 91 | 3 | int span = 0; // span of characters without a line break. | 
| 92 | 3 | boolean containsLongLines = false; | 
| 93 | 3 | int asciiChars = 0; | 
| 94 | 3 | int nonAsciiChars = 0; | 
| 95 | ||
| 96 | 3 | while (true) { | 
| 97 | 33 | int ch = in.read(); | 
| 98 | // if we hit an EOF here, go decide what type we've actually found. | |
| 99 | 33 | if (ch == -1) { | 
| 100 | 3 | break; | 
| 101 | } | |
| 102 | ||
| 103 | // we found a linebreak. Reset the line length counters on either one. We don't | |
| 104 | // really need to validate here. | |
| 105 | 30 | if (ch == '\n' || ch == '\r') { | 
| 106 | // hit a line end, reset our line length counter | |
| 107 | 0 | span = 0; | 
| 108 | } | |
| 109 | else { | |
| 110 | 30 | span++; | 
| 111 | // the text has long lines, we can't transfer this as unencoded text. | |
| 112 | 30 | if (span > 998) { | 
| 113 | 0 | containsLongLines = true; | 
| 114 | } | |
| 115 | ||
| 116 | // non-ascii character, we have to transfer this in binary. | |
| 117 | 30 | if (!isAscii(ch)) { | 
| 118 | 0 | nonAsciiChars++; | 
| 119 | } | |
| 120 | else { | |
| 121 | 30 | asciiChars++; | 
| 122 | } | |
| 123 | } | |
| 124 | } | |
| 125 | ||
| 126 | // looking good so far, only valid chars here. | |
| 127 | 3 | if (nonAsciiChars == 0) { | 
| 128 | // does this contain long text lines? We need to use a Q-P encoding which will | |
| 129 | // be only slightly longer, but handles folding the longer lines. | |
| 130 | 3 | if (containsLongLines) { | 
| 131 | 0 | return "quoted-printable"; | 
| 132 | } | |
| 133 | else { | |
| 134 | // ideal! Easiest one to handle. | |
| 135 | 3 | return "7bit"; | 
| 136 | } | |
| 137 | } | |
| 138 | else { | |
| 139 | // mostly characters requiring encoding? Base64 is our best bet. | |
| 140 | 0 | if (nonAsciiChars > asciiChars) { | 
| 141 | 0 | return "base64"; | 
| 142 | } | |
| 143 | else { | |
| 144 | // Q-P encoding will use fewer bytes than the full Base64. | |
| 145 | 0 | return "quoted-printable"; | 
| 146 | } | |
| 147 | } | |
| 148 | } | |
| 149 | ||
| 150 | ||
| 151 | /** | |
| 152 | * Examine a stream of text and make a judgement on what encoding | |
| 153 | * type should be used for the text. Ideally, we want to use 7bit | |
| 154 | * encoding to determine this, but we may need to use either quoted-printable | |
| 155 | * or base64. The choice is made on the ratio of 7-bit characters to non-7bit. | |
| 156 | * | |
| 157 | * @param content A string for the content we're examining. | |
| 158 | */ | |
| 159 | 80 | public static String getTextTransferEncoding(String content) { | 
| 160 | ||
| 161 | 80 | int asciiChars = 0; | 
| 162 | 80 | int nonAsciiChars = 0; | 
| 163 | ||
| 164 | 80 | for (int i = 0; i < content.length(); i++) { | 
| 165 | 580 | int ch = content.charAt(i); | 
| 166 | ||
| 167 | // non-ascii character, we have to transfer this in binary. | |
| 168 | 580 | if (!isAscii(ch)) { | 
| 169 | 29 | nonAsciiChars++; | 
| 170 | } | |
| 171 | else { | |
| 172 | 551 | asciiChars++; | 
| 173 | } | |
| 174 | } | |
| 175 | ||
| 176 | // looking good so far, only valid chars here. | |
| 177 | 80 | if (nonAsciiChars == 0) { | 
| 178 | // ideal! Easiest one to handle. | |
| 179 | 69 | return "7bit"; | 
| 180 | } | |
| 181 | else { | |
| 182 | // mostly characters requiring encoding? Base64 is our best bet. | |
| 183 | 11 | if (nonAsciiChars > asciiChars) { | 
| 184 | 0 | return "base64"; | 
| 185 | } | |
| 186 | else { | |
| 187 | // Q-P encoding will use fewer bytes than the full Base64. | |
| 188 | 11 | return "quoted-printable"; | 
| 189 | } | |
| 190 | } | |
| 191 | } | |
| 192 | ||
| 193 | ||
| 194 | /** | |
| 195 | * Determine if the transfer encoding looks like it might be | |
| 196 | * valid ascii text, and thus transferable as 7bit code. In | |
| 197 | * order for this to be true, all characters must be valid | |
| 198 | * 7-bit ASCII code AND all line breaks must be properly formed | |
| 199 | * (JUST '\r\n' sequences). 7-bit transfers also | |
| 200 | * typically have a line limit of 1000 bytes (998 + the CRLF), so any | |
| 201 | * stretch of charactes longer than that will also force Base64 encoding. | |
| 202 | * | |
| 203 | * @param content An input stream for the content we're examining. | |
| 204 | * | |
| 205 | * @exception IOException | |
| 206 | */ | |
| 207 | 1 | public static String getBinaryTransferEncoding(InputStream content) throws IOException { | 
| 208 | ||
| 209 | // for efficiency, we'll read in blocks. | |
| 210 | 1 | BufferedInputStream in = new BufferedInputStream(content, 4096); | 
| 211 | ||
| 212 | 1 | int previousChar = 0; | 
| 213 | 1 | int span = 0; // span of characters without a line break. | 
| 214 | ||
| 215 | 1 | while (true) { | 
| 216 | 1 | int ch = in.read(); | 
| 217 | // if we hit an EOF here, we've only found valid text so far, so we can transfer this as | |
| 218 | // 7-bit ascii. | |
| 219 | 1 | if (ch == -1) { | 
| 220 | 0 | return "7bit"; | 
| 221 | } | |
| 222 | ||
| 223 | // we found a newline, this is only valid if the previous char was the '\r' | |
| 224 | 1 | if (ch == '\n') { | 
| 225 | // malformed linebreak? force this to base64 encoding. | |
| 226 | 0 | if (previousChar != '\r') { | 
| 227 | 0 | return "base64"; | 
| 228 | } | |
| 229 | // hit a line end, reset our line length counter | |
| 230 | 0 | span = 0; | 
| 231 | } | |
| 232 | else { | |
| 233 | 1 | span++; | 
| 234 | // the text has long lines, we can't transfer this as unencoded text. | |
| 235 | 1 | if (span > 998) { | 
| 236 | 0 | return "base64"; | 
| 237 | } | |
| 238 | ||
| 239 | // non-ascii character, we have to transfer this in binary. | |
| 240 | 1 | if (!isAscii(ch)) { | 
| 241 | 1 | return "base64"; | 
| 242 | } | |
| 243 | } | |
| 244 | 0 | previousChar = ch; | 
| 245 | } | |
| 246 | } | |
| 247 | ||
| 248 | ||
| 249 | /** | |
| 250 | * Perform RFC 2047 text folding on a string of text. | |
| 251 | * | |
| 252 | * @param used The amount of text already "used up" on this line. This is | |
| 253 | * typically the length of a message header that this text | |
| 254 | * get getting added to. | |
| 255 | * @param s The text to fold. | |
| 256 | * | |
| 257 | * @return The input text, with linebreaks inserted at appropriate fold points. | |
| 258 | */ | |
| 259 | 8 | public static String fold(int used, String s) { | 
| 260 | // if folding is disable, unfolding is also. Return the string unchanged. | |
| 261 | 8 | if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) { | 
| 262 | 0 | return s; | 
| 263 | } | |
| 264 | ||
| 265 | 8 | int end; | 
| 266 | ||
| 267 | // now we need to strip off any trailing "whitespace", where whitespace is blanks, tabs, | |
| 268 | // and line break characters. | |
| 269 | 8 | for (end = s.length() - 1; end >= 0; end--) { | 
| 270 | 8 | int ch = s.charAt(end); | 
| 271 | 8 | if (ch != ' ' && ch != '\t' ) { | 
| 272 | 8 | break; | 
| 273 | } | |
| 274 | } | |
| 275 | ||
| 276 | // did we actually find something to remove? Shorten the String to the trimmed length | |
| 277 | 8 | if (end != s.length() - 1) { | 
| 278 | 0 | s = s.substring(0, end + 1); | 
| 279 | } | |
| 280 | ||
| 281 | // does the string as it exists now not require folding? We can just had that back right off. | |
| 282 | 8 | if (s.length() + used <= FOLD_THRESHOLD) { | 
| 283 | 8 | return s; | 
| 284 | } | |
| 285 | ||
| 286 | // get a buffer for the length of the string, plus room for a few line breaks. | |
| 287 | // these are soft line breaks, so we generally need more that just the line breaks (an escape + | |
| 288 | // CR + LF + leading space on next line); | |
| 289 | 0 | StringBuffer newString = new StringBuffer(s.length() + 8); | 
| 290 | ||
| 291 | ||
| 292 | // now keep chopping this down until we've accomplished what we need. | |
| 293 | 0 | while (used + s.length() > FOLD_THRESHOLD) { | 
| 294 | 0 | int breakPoint = -1; | 
| 295 | 0 | char breakChar = 0; | 
| 296 | ||
| 297 | // now scan for the next place where we can break. | |
| 298 | 0 | for (int i = 0; i < s.length(); i++) { | 
| 299 | // have we passed the fold limit? | |
| 300 | 0 | if (used + i > FOLD_THRESHOLD) { | 
| 301 | // if we've already seen a blank, then stop now. Otherwise | |
| 302 | // we keep going until we hit a fold point. | |
| 303 | 0 | if (breakPoint != -1) { | 
| 304 | 0 | break; | 
| 305 | } | |
| 306 | } | |
| 307 | 0 | char ch = s.charAt(i); | 
| 308 | ||
| 309 | // a white space character? | |
| 310 | 0 | if (ch == ' ' || ch == '\t') { | 
| 311 | // this might be a run of white space, so skip over those now. | |
| 312 | 0 | breakPoint = i; | 
| 313 | // we need to maintain the same character type after the inserted linebreak. | |
| 314 | 0 | breakChar = ch; | 
| 315 | 0 | i++; | 
| 316 | 0 | while (i < s.length()) { | 
| 317 | 0 | ch = s.charAt(i); | 
| 318 | 0 | if (ch != ' ' && ch != '\t') { | 
| 319 | 0 | break; | 
| 320 | } | |
| 321 | 0 | i++; | 
| 322 | } | |
| 323 | } | |
| 324 | // found an embedded new line. Escape this so that the unfolding process preserves it. | |
| 325 | 0 | else if (ch == '\n') { | 
| 326 | 0 | newString.append('\\'); | 
| 327 | 0 | newString.append('\n'); | 
| 328 | } | |
| 329 | 0 | else if (ch == '\r') { | 
| 330 | 0 | newString.append('\\'); | 
| 331 | 0 | newString.append('\n'); | 
| 332 | 0 | i++; | 
| 333 | // if this is a CRLF pair, add the second char also | |
| 334 | 0 | if (i < s.length() && s.charAt(i) == '\n') { | 
| 335 | 0 | newString.append('\r'); | 
| 336 | } | |
| 337 | } | |
| 338 | ||
| 339 | } | |
| 340 | // no fold point found, we punt, append the remainder and leave. | |
| 341 | 0 | if (breakPoint == -1) { | 
| 342 | 0 | newString.append(s); | 
| 343 | 0 | return newString.toString(); | 
| 344 | } | |
| 345 | 0 | newString.append(s.substring(0, breakPoint)); | 
| 346 | 0 | newString.append("\r\n"); | 
| 347 | 0 | newString.append(breakChar); | 
| 348 | // chop the string | |
| 349 | 0 | s = s.substring(breakPoint + 1); | 
| 350 | // start again, and we've used the first char of the limit already with the whitespace char. | |
| 351 | 0 | used = 1; | 
| 352 | } | |
| 353 | ||
| 354 | // add on the remainder, and return | |
| 355 | 0 | newString.append(s); | 
| 356 | 0 | return newString.toString(); | 
| 357 | } | |
| 358 | ||
| 359 | /** | |
| 360 | * Unfold a folded string. The unfolding process will remove | |
| 361 | * any line breaks that are not escaped and which are also followed | |
| 362 | * by whitespace characters. | |
| 363 | * | |
| 364 | * @param s The folded string. | |
| 365 | * | |
| 366 | * @return A new string with unfolding rules applied. | |
| 367 | */ | |
| 368 | 6 | public static String unfold(String s) { | 
| 369 | // if folding is disable, unfolding is also. Return the string unchanged. | |
| 370 | 6 | if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) { | 
| 371 | 0 | return s; | 
| 372 | } | |
| 373 | ||
| 374 | // if there are no line break characters in the string, we can just return this. | |
| 375 | 6 | if (s.indexOf('\n') < 0 && s.indexOf('\r') < 0) { | 
| 376 | 6 | return s; | 
| 377 | } | |
| 378 | ||
| 379 | // we need to scan and fix things up. | |
| 380 | 0 | int length = s.length(); | 
| 381 | ||
| 382 | 0 | StringBuffer newString = new StringBuffer(length); | 
| 383 | ||
| 384 | // scan the entire string | |
| 385 | 0 | for (int i = 0; i < length; i++) { | 
| 386 | 0 | int ch = s.charAt(i); | 
| 387 | ||
| 388 | // we have a backslash. In folded strings, escape characters are only processed as such if | |
| 389 | // they preceed line breaks. Otherwise, we leave it be. | |
| 390 | 0 | if (ch == '\\') { | 
| 391 | // escape at the very end? Just add the character. | |
| 392 | 0 | if (i == length - 1) { | 
| 393 | 0 | newString.append(ch); | 
| 394 | } | |
| 395 | else { | |
| 396 | 0 | int nextChar = s.charAt(i + 1); | 
| 397 | ||
| 398 | // naked newline? Add the new line to the buffer, and skip the escape char. | |
| 399 | 0 | if (nextChar == '\n') { | 
| 400 | 0 | newString.append('\n'); | 
| 401 | 0 | i++; | 
| 402 | } | |
| 403 | 0 | else if (nextChar == '\r') { | 
| 404 | // just the CR left? Add it, removing the escape. | |
| 405 | 0 | if (i == length - 2 || s.charAt(i + 2) != '\r') { | 
| 406 | 0 | newString.append('\r'); | 
| 407 | 0 | i++; | 
| 408 | } | |
| 409 | else { | |
| 410 | // toss the escape, add both parts of the CRLF, and skip over two chars. | |
| 411 | 0 | newString.append('\r'); | 
| 412 | 0 | newString.append('\n'); | 
| 413 | 0 | i += 2; | 
| 414 | } | |
| 415 | } | |
| 416 | else { | |
| 417 | // an escape for another purpose, just copy it over. | |
| 418 | 0 | newString.append(ch); | 
| 419 | } | |
| 420 | } | |
| 421 | } | |
| 422 | // we have an unescaped line break | |
| 423 | 0 | else if (ch == '\n' || ch == '\r') { | 
| 424 | // remember the position in case we need to backtrack. | |
| 425 | 0 | int lineBreak = i; | 
| 426 | 0 | boolean CRLF = false; | 
| 427 | ||
| 428 | 0 | if (ch == '\r') { | 
| 429 | // check to see if we need to step over this. | |
| 430 | 0 | if (i < length - 1 && s.charAt(i + 1) == '\n') { | 
| 431 | 0 | i++; | 
| 432 | // flag the type so we know what we might need to preserve. | |
| 433 | 0 | CRLF = true; | 
| 434 | } | |
| 435 | } | |
| 436 | ||
| 437 | // get a temp position scanner. | |
| 438 | 0 | int scan = i + 1; | 
| 439 | ||
| 440 | // does a blank follow this new line? we need to scrap the new line and reduce the leading blanks | |
| 441 | // down to a single blank. | |
| 442 | 0 | if (scan < length && s.charAt(scan) == ' ') { | 
| 443 | // add the character | |
| 444 | 0 | newString.append(' '); | 
| 445 | ||
| 446 | // scan over the rest of the blanks | |
| 447 | 0 | i = scan + 1; | 
| 448 | 0 | while (i < length && s.charAt(i) == ' ') { | 
| 449 | 0 | i++; | 
| 450 | } | |
| 451 | // we'll increment down below, so back up to the last blank as the current char. | |
| 452 | 0 | i--; | 
| 453 | } | |
| 454 | else { | |
| 455 | // we must keep this line break. Append the appropriate style. | |
| 456 | 0 | if (CRLF) { | 
| 457 | 0 | newString.append("\r\n"); | 
| 458 | } | |
| 459 | else { | |
| 460 | 0 | newString.append(ch); | 
| 461 | } | |
| 462 | } | |
| 463 | } | |
| 464 | else { | |
| 465 | // just a normal, ordinary character | |
| 466 | 0 | newString.append(ch); | 
| 467 | } | |
| 468 | } | |
| 469 | 0 | return newString.toString(); | 
| 470 | } | |
| 471 | } | 
  | 
||||||||||