001 /** 002 * 003 * Copyright 2003-2004 The Apache Software Foundation 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.geronimo.mail.util; 019 020 import java.io.EOFException; 021 import java.io.IOException; 022 import java.io.InputStream; 023 import java.io.OutputStream; 024 import java.io.PrintStream; 025 import java.io.PushbackInputStream; 026 import java.io.UnsupportedEncodingException; 027 028 public class QuotedPrintableEncoder implements Encoder { 029 030 static protected final byte[] encodingTable = 031 { 032 (byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5', (byte)'6', (byte)'7', 033 (byte)'8', (byte)'9', (byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F' 034 }; 035 036 /* 037 * set up the decoding table. 038 */ 039 static protected final byte[] decodingTable = new byte[128]; 040 041 static { 042 // initialize the decoding table 043 for (int i = 0; i < encodingTable.length; i++) 044 { 045 decodingTable[encodingTable[i]] = (byte)i; 046 } 047 } 048 049 050 // default number of characters we will write per line. 051 static private final int DEFAULT_CHARS_PER_LINE = 76; 052 053 // the output stream we're wrapped around 054 protected OutputStream out; 055 // the number of bytes written; 056 protected int bytesWritten = 0; 057 // number of bytes written on the current line 058 protected int lineCount = 0; 059 // line length we're dealing with 060 protected int lineLength; 061 // number of deferred whitespace characters in decode mode. 062 protected int deferredWhitespace = 0; 063 064 protected int cachedCharacter = -1; 065 066 // indicates whether the last character was a '\r', potentially part of a CRLF sequence. 067 protected boolean lastCR = false; 068 // remember whether last character was a white space. 069 protected boolean lastWhitespace = false; 070 071 public QuotedPrintableEncoder() { 072 this(null, DEFAULT_CHARS_PER_LINE); 073 } 074 075 public QuotedPrintableEncoder(OutputStream out) { 076 this(out, DEFAULT_CHARS_PER_LINE); 077 } 078 079 public QuotedPrintableEncoder(OutputStream out, int lineLength) { 080 this.out = out; 081 this.lineLength = lineLength; 082 } 083 084 private void checkDeferred(int ch) throws IOException { 085 // was the last character we looked at a whitespace? Try to decide what to do with it now. 086 if (lastWhitespace) { 087 // if this whitespace is at the end of the line, write it out encoded 088 if (ch == '\r' || ch == '\n') { 089 writeEncodedCharacter(' '); 090 } 091 else { 092 // we can write this out without encoding. 093 writeCharacter(' '); 094 } 095 // we always turn this off. 096 lastWhitespace = false; 097 } 098 // deferred carriage return? 099 else if (lastCR) { 100 // if the char following the CR was not a new line, write an EOL now. 101 if (ch != '\n') { 102 writeEOL(); 103 } 104 // we always turn this off too 105 lastCR = false; 106 } 107 } 108 109 110 /** 111 * encode the input data producing a UUEncoded output stream. 112 * 113 * @param data The array of byte data. 114 * @param off The starting offset within the data. 115 * @param length Length of the data to encode. 116 * 117 * @return the number of bytes produced. 118 */ 119 public int encode(byte[] data, int off, int length) throws IOException { 120 int endOffset = off + length; 121 122 while (off < endOffset) { 123 // get the character 124 byte ch = data[off++]; 125 126 // handle the encoding of this character. 127 encode(ch); 128 } 129 130 return bytesWritten; 131 } 132 133 134 public void encode(int ch) throws IOException { 135 // make sure this is just a single byte value. 136 ch = ch &0xFF; 137 138 // see if we had to defer handling of a whitespace or '\r' character, and handle it if necessary. 139 checkDeferred(ch); 140 // different characters require special handling. 141 switch (ch) { 142 // spaces require special handling. If the next character is a line terminator, then 143 // the space needs to be encoded. 144 case ' ': 145 { 146 // at this point, we don't know whether this needs encoding or not. If the next 147 // character is a linend, it gets encoded. If anything else, we just write it as is. 148 lastWhitespace = true; 149 // turn off any CR flags. 150 lastCR = false; 151 break; 152 } 153 154 // carriage return, which may be part of a CRLF sequence. 155 case '\r': 156 { 157 // just flag this until we see the next character. 158 lastCR = true; 159 break; 160 } 161 162 // a new line character...we need to check to see if it was paired up with a '\r' char. 163 case '\n': 164 { 165 // we always write this out for a newline. We defer CRs until we see if the LF follows. 166 writeEOL(); 167 break; 168 } 169 170 // an '=' is the escape character for an encoded character, so it must also 171 // be written encoded. 172 case '=': 173 { 174 writeEncodedCharacter(ch); 175 break; 176 } 177 178 // all other characters. If outside the printable character range, write it encoded. 179 default: 180 { 181 if (ch < 32 || ch >= 127) { 182 writeEncodedCharacter(ch); 183 } 184 else { 185 writeCharacter(ch); 186 } 187 break; 188 } 189 } 190 } 191 192 193 /** 194 * encode the input data producing a UUEncoded output stream. 195 * 196 * @param data The array of byte data. 197 * @param off The starting offset within the data. 198 * @param length Length of the data to encode. 199 * 200 * @return the number of bytes produced. 201 */ 202 public int encode(byte[] data, int off, int length, String specials) throws IOException { 203 int endOffset = off + length; 204 205 while (off < endOffset) { 206 // get the character 207 byte ch = data[off++]; 208 209 // handle the encoding of this character. 210 encode(ch, specials); 211 } 212 213 return bytesWritten; 214 } 215 216 217 /** 218 * encode the input data producing a UUEncoded output stream. 219 * 220 * @param data The array of byte data. 221 * @param off The starting offset within the data. 222 * @param length Length of the data to encode. 223 * 224 * @return the number of bytes produced. 225 */ 226 public int encode(PushbackInputStream in, StringBuffer out, String specials, int limit) throws IOException { 227 int count = 0; 228 229 while (count < limit) { 230 int ch = in.read(); 231 232 if (ch == -1) { 233 return count; 234 } 235 // make sure this is just a single byte value. 236 ch = ch &0xFF; 237 238 // spaces require special handling. If the next character is a line terminator, then 239 // the space needs to be encoded. 240 if (ch == ' ') { 241 // blanks get translated into underscores, because the encoded tokens can't have embedded blanks. 242 out.append('_'); 243 count++; 244 } 245 // non-ascii chars and the designated specials all get encoded. 246 else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) { 247 // we need at least 3 characters to write this out, so we need to 248 // forget we saw this one and try in the next segment. 249 if (count + 3 > limit) { 250 in.unread(ch); 251 return count; 252 } 253 out.append('='); 254 out.append((char)encodingTable[ch >> 4]); 255 out.append((char)encodingTable[ch & 0x0F]); 256 count += 3; 257 } 258 else { 259 // good character, just use unchanged. 260 out.append((char)ch); 261 count++; 262 } 263 } 264 return count; 265 } 266 267 268 /** 269 * Specialized version of the decoder that handles encoding of 270 * RFC 2047 encoded word values. This has special handling for 271 * certain characters, but less special handling for blanks and 272 * linebreaks. 273 * 274 * @param ch 275 * @param specials 276 * 277 * @exception IOException 278 */ 279 public void encode(int ch, String specials) throws IOException { 280 // make sure this is just a single byte value. 281 ch = ch &0xFF; 282 283 // spaces require special handling. If the next character is a line terminator, then 284 // the space needs to be encoded. 285 if (ch == ' ') { 286 // blanks get translated into underscores, because the encoded tokens can't have embedded blanks. 287 writeCharacter('_'); 288 } 289 // non-ascii chars and the designated specials all get encoded. 290 else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) { 291 writeEncodedCharacter(ch); 292 } 293 else { 294 // good character, just use unchanged. 295 writeCharacter(ch); 296 } 297 } 298 299 300 /** 301 * encode the input data producing a UUEncoded output stream. 302 * 303 * @param data The array of byte data. 304 * @param off The starting offset within the data. 305 * @param length Length of the data to encode. 306 * @param out The output stream the encoded data is written to. 307 * 308 * @return the number of bytes produced. 309 */ 310 public int encode(byte[] data, int off, int length, OutputStream out) throws IOException { 311 // make sure we're writing to the correct stream 312 this.out = out; 313 bytesWritten = 0; 314 315 // do the actual encoding 316 return encode(data, off, length); 317 } 318 319 320 /** 321 * decode the uuencoded byte data writing it to the given output stream 322 * 323 * @param data The array of byte data to decode. 324 * @param off Starting offset within the array. 325 * @param length The length of data to encode. 326 * @param out The output stream used to return the decoded data. 327 * 328 * @return the number of bytes produced. 329 * @exception IOException 330 */ 331 public int decode(byte[] data, int off, int length, OutputStream out) throws IOException { 332 // make sure we're writing to the correct stream 333 this.out = out; 334 335 int endOffset = off + length; 336 int bytesWritten = 0; 337 338 while (off < endOffset) { 339 byte ch = data[off++]; 340 341 // space characters are a pain. We need to scan ahead until we find a non-space character. 342 // if the character is a line terminator, we need to discard the blanks. 343 if (ch == ' ') { 344 int trailingSpaces = 1; 345 // scan forward, counting the characters. 346 while (off < endOffset && data[off] == ' ') { 347 // step forward and count this. 348 off++; 349 trailingSpaces++; 350 } 351 // is this a lineend at the current location? 352 if (off >= endOffset || data[off] == '\r' || data[off] == '\n') { 353 // go to the next one 354 continue; 355 } 356 else { 357 // make sure we account for the spaces in the output count. 358 bytesWritten += trailingSpaces; 359 // write out the blank characters we counted and continue with the non-blank. 360 while (trailingSpaces-- > 0) { 361 out.write(' '); 362 } 363 } 364 } 365 else if (ch == '=') { 366 // we found an encoded character. Reduce the 3 char sequence to one. 367 // but first, make sure we have two characters to work with. 368 if (off + 1 >= endOffset) { 369 throw new IOException("Invalid quoted printable encoding"); 370 } 371 // convert the two bytes back from hex. 372 byte b1 = data[off++]; 373 byte b2 = data[off++]; 374 375 // we've found an encoded carriage return. The next char needs to be a newline 376 if (b1 == '\r') { 377 if (b2 != '\n') { 378 throw new IOException("Invalid quoted printable encoding"); 379 } 380 // this was a soft linebreak inserted by the encoding. We just toss this away 381 // on decode. 382 } 383 else { 384 // this is a hex pair we need to convert back to a single byte. 385 b1 = decodingTable[b1]; 386 b2 = decodingTable[b2]; 387 out.write((b1 << 4) | b2); 388 // 3 bytes in, one byte out 389 bytesWritten++; 390 } 391 } 392 else { 393 // simple character, just write it out. 394 out.write(ch); 395 bytesWritten++; 396 } 397 } 398 399 return bytesWritten; 400 } 401 402 /** 403 * Decode a byte array of data. 404 * 405 * @param data The data array. 406 * @param out The output stream target for the decoded data. 407 * 408 * @return The number of bytes written to the stream. 409 * @exception IOException 410 */ 411 public int decodeWord(byte[] data, OutputStream out) throws IOException { 412 return decodeWord(data, 0, data.length, out); 413 } 414 415 416 /** 417 * decode the uuencoded byte data writing it to the given output stream 418 * 419 * @param data The array of byte data to decode. 420 * @param off Starting offset within the array. 421 * @param length The length of data to encode. 422 * @param out The output stream used to return the decoded data. 423 * 424 * @return the number of bytes produced. 425 * @exception IOException 426 */ 427 public int decodeWord(byte[] data, int off, int length, OutputStream out) throws IOException { 428 // make sure we're writing to the correct stream 429 this.out = out; 430 431 int endOffset = off + length; 432 int bytesWritten = 0; 433 434 while (off < endOffset) { 435 byte ch = data[off++]; 436 437 // space characters were translated to '_' on encode, so we need to translate them back. 438 if (ch == '_') { 439 out.write(' '); 440 } 441 else if (ch == '=') { 442 // we found an encoded character. Reduce the 3 char sequence to one. 443 // but first, make sure we have two characters to work with. 444 if (off + 1 >= endOffset) { 445 throw new IOException("Invalid quoted printable encoding"); 446 } 447 // convert the two bytes back from hex. 448 byte b1 = data[off++]; 449 byte b2 = data[off++]; 450 451 // we've found an encoded carriage return. The next char needs to be a newline 452 if (b1 == '\r') { 453 if (b2 != '\n') { 454 throw new IOException("Invalid quoted printable encoding"); 455 } 456 // this was a soft linebreak inserted by the encoding. We just toss this away 457 // on decode. 458 } 459 else { 460 // this is a hex pair we need to convert back to a single byte. 461 byte c1 = decodingTable[b1]; 462 byte c2 = decodingTable[b2]; 463 out.write((c1 << 4) | c2); 464 // 3 bytes in, one byte out 465 bytesWritten++; 466 } 467 } 468 else { 469 // simple character, just write it out. 470 out.write(ch); 471 bytesWritten++; 472 } 473 } 474 475 return bytesWritten; 476 } 477 478 479 /** 480 * decode the UUEncoded String data writing it to the given output stream. 481 * 482 * @param data The String data to decode. 483 * @param out The output stream to write the decoded data to. 484 * 485 * @return the number of bytes produced. 486 * @exception IOException 487 */ 488 public int decode(String data, OutputStream out) throws IOException { 489 try { 490 // just get the byte data and decode. 491 byte[] bytes = data.getBytes("US-ASCII"); 492 return decode(bytes, 0, bytes.length, out); 493 } catch (UnsupportedEncodingException e) { 494 throw new IOException("Invalid UUEncoding"); 495 } 496 } 497 498 private void checkLineLength(int required) throws IOException { 499 // if we're at our line length limit, write out a soft line break and reset. 500 if ((lineCount + required) > lineLength ) { 501 out.write('='); 502 out.write('\r'); 503 out.write('\n'); 504 bytesWritten += 3; 505 lineCount = 0; 506 } 507 } 508 509 510 public void writeEncodedCharacter(int ch) throws IOException { 511 // we need 3 characters for an encoded value 512 checkLineLength(3); 513 out.write('='); 514 out.write(encodingTable[ch >> 4]); 515 out.write(encodingTable[ch & 0x0F]); 516 lineCount += 3; 517 bytesWritten += 3; 518 } 519 520 521 public void writeCharacter(int ch) throws IOException { 522 // we need 3 characters for an encoded value 523 checkLineLength(1); 524 out.write(ch); 525 lineCount++; 526 bytesWritten++; 527 } 528 529 530 public void writeEOL() throws IOException { 531 out.write('\r'); 532 out.write('\n'); 533 lineCount = 0; 534 bytesWritten += 3; 535 } 536 537 538 public int decode(InputStream in) throws IOException { 539 540 // we potentially need to scan over spans of whitespace characters to determine if they're real 541 // we just return blanks until the count goes to zero. 542 if (deferredWhitespace > 0) { 543 deferredWhitespace--; 544 return ' '; 545 } 546 547 // we may have needed to scan ahead to find the first non-blank character, which we would store here. 548 // hand that back once we're done with the blanks. 549 if (cachedCharacter != -1) { 550 int result = cachedCharacter; 551 cachedCharacter = -1; 552 return result; 553 } 554 555 int ch = in.read(); 556 557 // reflect back an EOF condition. 558 if (ch == -1) { 559 return -1; 560 } 561 562 // space characters are a pain. We need to scan ahead until we find a non-space character. 563 // if the character is a line terminator, we need to discard the blanks. 564 if (ch == ' ') { 565 // scan forward, counting the characters. 566 while ((ch = in.read()) == ' ') { 567 deferredWhitespace++; 568 } 569 570 // is this a lineend at the current location? 571 if (ch == -1 || ch == '\r' || ch == '\n') { 572 // those blanks we so zealously counted up don't really exist. Clear out the counter. 573 deferredWhitespace = 0; 574 // return the real significant character now. 575 return ch; 576 } 577 else { 578 // remember this character for later, after we've used up the deferred blanks. 579 cachedCharacter = ch; 580 // return this space. We did not include this one in the deferred count, so we're right in sync. 581 return ' '; 582 } 583 } 584 else if (ch == '=') { 585 int b1 = in.read(); 586 // we need to get two characters after the quotation marker 587 if (b1 == -1) { 588 throw new IOException("Truncated quoted printable data"); 589 } 590 int b2 = in.read(); 591 // we need to get two characters after the quotation marker 592 if (b2 == -1) { 593 throw new IOException("Truncated quoted printable data"); 594 } 595 596 // we've found an encoded carriage return. The next char needs to be a newline 597 if (b1 == '\r') { 598 if (b2 != '\n') { 599 throw new IOException("Invalid quoted printable encoding"); 600 } 601 // this was a soft linebreak inserted by the encoding. We just toss this away 602 // on decode. We need to return something, so recurse and decode the next. 603 return decode(in); 604 } 605 else { 606 // this is a hex pair we need to convert back to a single byte. 607 b1 = decodingTable[b1]; 608 b2 = decodingTable[b2]; 609 return (b1 << 4) | b2; 610 } 611 } 612 else { 613 return ch; 614 } 615 } 616 617 618 /** 619 * Perform RFC-2047 word encoding using Q-P data encoding. 620 * 621 * @param in The source for the encoded data. 622 * @param charset The charset tag to be added to each encoded data section. 623 * @param specials The set of special characters that we require to encoded. 624 * @param out The output stream where the encoded data is to be written. 625 * @param fold Controls whether separate sections of encoded data are separated by 626 * linebreaks or whitespace. 627 * 628 * @exception IOException 629 */ 630 public void encodeWord(InputStream in, String charset, String specials, OutputStream out, boolean fold) throws IOException 631 { 632 // we need to scan ahead in a few places, which may require pushing characters back on to the stream. 633 // make sure we have a stream where this is possible. 634 PushbackInputStream inStream = new PushbackInputStream(in); 635 PrintStream writer = new PrintStream(out); 636 637 // segments of encoded data are limited to 76 byes, including the control sections. 638 int limit = 76 - 7 - charset.length(); 639 boolean firstLine = true; 640 StringBuffer encodedString = new StringBuffer(76); 641 642 while (true) { 643 644 // encode another segment of data. 645 encode(inStream, encodedString, specials, limit); 646 // nothing encoded means we've hit the end of the data. 647 if (encodedString.length() == 0) { 648 break; 649 } 650 // if we have more than one segment, we need to insert separators. Depending on whether folding 651 // was requested, this is either a blank or a linebreak. 652 if (!firstLine) { 653 if (fold) { 654 writer.print("\r\n"); 655 } 656 else { 657 writer.print(" "); 658 } 659 } 660 661 // add the encoded word header 662 writer.print("=?"); 663 writer.print(charset); 664 writer.print("?Q?"); 665 // the data 666 writer.print(encodedString.toString()); 667 // and the terminator mark 668 writer.print("?="); 669 writer.flush(); 670 671 // we reset the string buffer and reuse it. 672 encodedString.setLength(0); 673 } 674 } 675 } 676 677 678