001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 020 package org.apache.geronimo.mail.util; 021 022 import java.io.EOFException; 023 import java.io.IOException; 024 import java.io.InputStream; 025 import java.io.OutputStream; 026 import java.io.PrintStream; 027 import java.io.PushbackInputStream; 028 import java.io.UnsupportedEncodingException; 029 030 public class QuotedPrintableEncoder implements Encoder { 031 032 static protected final byte[] encodingTable = 033 { 034 (byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5', (byte)'6', (byte)'7', 035 (byte)'8', (byte)'9', (byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F' 036 }; 037 038 /* 039 * set up the decoding table. 040 */ 041 static protected final byte[] decodingTable = new byte[128]; 042 043 static { 044 // initialize the decoding table 045 for (int i = 0; i < encodingTable.length; i++) 046 { 047 decodingTable[encodingTable[i]] = (byte)i; 048 } 049 } 050 051 052 // default number of characters we will write per line. 053 static private final int DEFAULT_CHARS_PER_LINE = 76; 054 055 // the output stream we're wrapped around 056 protected OutputStream out; 057 // the number of bytes written; 058 protected int bytesWritten = 0; 059 // number of bytes written on the current line 060 protected int lineCount = 0; 061 // line length we're dealing with 062 protected int lineLength; 063 // number of deferred whitespace characters in decode mode. 064 protected int deferredWhitespace = 0; 065 066 protected int cachedCharacter = -1; 067 068 // indicates whether the last character was a '\r', potentially part of a CRLF sequence. 069 protected boolean lastCR = false; 070 // remember whether last character was a white space. 071 protected boolean lastWhitespace = false; 072 073 public QuotedPrintableEncoder() { 074 this(null, DEFAULT_CHARS_PER_LINE); 075 } 076 077 public QuotedPrintableEncoder(OutputStream out) { 078 this(out, DEFAULT_CHARS_PER_LINE); 079 } 080 081 public QuotedPrintableEncoder(OutputStream out, int lineLength) { 082 this.out = out; 083 this.lineLength = lineLength; 084 } 085 086 private void checkDeferred(int ch) throws IOException { 087 // was the last character we looked at a whitespace? Try to decide what to do with it now. 088 if (lastWhitespace) { 089 // if this whitespace is at the end of the line, write it out encoded 090 if (ch == '\r' || ch == '\n') { 091 writeEncodedCharacter(' '); 092 } 093 else { 094 // we can write this out without encoding. 095 writeCharacter(' '); 096 } 097 // we always turn this off. 098 lastWhitespace = false; 099 } 100 // deferred carriage return? 101 else if (lastCR) { 102 // if the char following the CR was not a new line, write an EOL now. 103 if (ch != '\n') { 104 writeEOL(); 105 } 106 // we always turn this off too 107 lastCR = false; 108 } 109 } 110 111 112 /** 113 * encode the input data producing a UUEncoded output stream. 114 * 115 * @param data The array of byte data. 116 * @param off The starting offset within the data. 117 * @param length Length of the data to encode. 118 * 119 * @return the number of bytes produced. 120 */ 121 public int encode(byte[] data, int off, int length) throws IOException { 122 int endOffset = off + length; 123 124 while (off < endOffset) { 125 // get the character 126 byte ch = data[off++]; 127 128 // handle the encoding of this character. 129 encode(ch); 130 } 131 132 return bytesWritten; 133 } 134 135 136 public void encode(int ch) throws IOException { 137 // make sure this is just a single byte value. 138 ch = ch &0xFF; 139 140 // see if we had to defer handling of a whitespace or '\r' character, and handle it if necessary. 141 checkDeferred(ch); 142 // different characters require special handling. 143 switch (ch) { 144 // spaces require special handling. If the next character is a line terminator, then 145 // the space needs to be encoded. 146 case ' ': 147 { 148 // at this point, we don't know whether this needs encoding or not. If the next 149 // character is a linend, it gets encoded. If anything else, we just write it as is. 150 lastWhitespace = true; 151 // turn off any CR flags. 152 lastCR = false; 153 break; 154 } 155 156 // carriage return, which may be part of a CRLF sequence. 157 case '\r': 158 { 159 // just flag this until we see the next character. 160 lastCR = true; 161 break; 162 } 163 164 // a new line character...we need to check to see if it was paired up with a '\r' char. 165 case '\n': 166 { 167 // we always write this out for a newline. We defer CRs until we see if the LF follows. 168 writeEOL(); 169 break; 170 } 171 172 // an '=' is the escape character for an encoded character, so it must also 173 // be written encoded. 174 case '=': 175 { 176 writeEncodedCharacter(ch); 177 break; 178 } 179 180 // all other characters. If outside the printable character range, write it encoded. 181 default: 182 { 183 if (ch < 32 || ch >= 127) { 184 writeEncodedCharacter(ch); 185 } 186 else { 187 writeCharacter(ch); 188 } 189 break; 190 } 191 } 192 } 193 194 195 /** 196 * encode the input data producing a UUEncoded output stream. 197 * 198 * @param data The array of byte data. 199 * @param off The starting offset within the data. 200 * @param length Length of the data to encode. 201 * 202 * @return the number of bytes produced. 203 */ 204 public int encode(byte[] data, int off, int length, String specials) throws IOException { 205 int endOffset = off + length; 206 207 while (off < endOffset) { 208 // get the character 209 byte ch = data[off++]; 210 211 // handle the encoding of this character. 212 encode(ch, specials); 213 } 214 215 return bytesWritten; 216 } 217 218 219 /** 220 * encode the input data producing a UUEncoded output stream. 221 * 222 * @param data The array of byte data. 223 * @param off The starting offset within the data. 224 * @param length Length of the data to encode. 225 * 226 * @return the number of bytes produced. 227 */ 228 public int encode(PushbackInputStream in, StringBuffer out, String specials, int limit) throws IOException { 229 int count = 0; 230 231 while (count < limit) { 232 int ch = in.read(); 233 234 if (ch == -1) { 235 return count; 236 } 237 // make sure this is just a single byte value. 238 ch = ch &0xFF; 239 240 // spaces require special handling. If the next character is a line terminator, then 241 // the space needs to be encoded. 242 if (ch == ' ') { 243 // blanks get translated into underscores, because the encoded tokens can't have embedded blanks. 244 out.append('_'); 245 count++; 246 } 247 // non-ascii chars and the designated specials all get encoded. 248 else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) { 249 // we need at least 3 characters to write this out, so we need to 250 // forget we saw this one and try in the next segment. 251 if (count + 3 > limit) { 252 in.unread(ch); 253 return count; 254 } 255 out.append('='); 256 out.append((char)encodingTable[ch >> 4]); 257 out.append((char)encodingTable[ch & 0x0F]); 258 count += 3; 259 } 260 else { 261 // good character, just use unchanged. 262 out.append((char)ch); 263 count++; 264 } 265 } 266 return count; 267 } 268 269 270 /** 271 * Specialized version of the decoder that handles encoding of 272 * RFC 2047 encoded word values. This has special handling for 273 * certain characters, but less special handling for blanks and 274 * linebreaks. 275 * 276 * @param ch 277 * @param specials 278 * 279 * @exception IOException 280 */ 281 public void encode(int ch, String specials) throws IOException { 282 // make sure this is just a single byte value. 283 ch = ch &0xFF; 284 285 // spaces require special handling. If the next character is a line terminator, then 286 // the space needs to be encoded. 287 if (ch == ' ') { 288 // blanks get translated into underscores, because the encoded tokens can't have embedded blanks. 289 writeCharacter('_'); 290 } 291 // non-ascii chars and the designated specials all get encoded. 292 else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) { 293 writeEncodedCharacter(ch); 294 } 295 else { 296 // good character, just use unchanged. 297 writeCharacter(ch); 298 } 299 } 300 301 302 /** 303 * encode the input data producing a UUEncoded output stream. 304 * 305 * @param data The array of byte data. 306 * @param off The starting offset within the data. 307 * @param length Length of the data to encode. 308 * @param out The output stream the encoded data is written to. 309 * 310 * @return the number of bytes produced. 311 */ 312 public int encode(byte[] data, int off, int length, OutputStream out) throws IOException { 313 // make sure we're writing to the correct stream 314 this.out = out; 315 bytesWritten = 0; 316 317 // do the actual encoding 318 return encode(data, off, length); 319 } 320 321 322 /** 323 * decode the uuencoded byte data writing it to the given output stream 324 * 325 * @param data The array of byte data to decode. 326 * @param off Starting offset within the array. 327 * @param length The length of data to encode. 328 * @param out The output stream used to return the decoded data. 329 * 330 * @return the number of bytes produced. 331 * @exception IOException 332 */ 333 public int decode(byte[] data, int off, int length, OutputStream out) throws IOException { 334 // make sure we're writing to the correct stream 335 this.out = out; 336 337 int endOffset = off + length; 338 int bytesWritten = 0; 339 340 while (off < endOffset) { 341 byte ch = data[off++]; 342 343 // space characters are a pain. We need to scan ahead until we find a non-space character. 344 // if the character is a line terminator, we need to discard the blanks. 345 if (ch == ' ') { 346 int trailingSpaces = 1; 347 // scan forward, counting the characters. 348 while (off < endOffset && data[off] == ' ') { 349 // step forward and count this. 350 off++; 351 trailingSpaces++; 352 } 353 // is this a lineend at the current location? 354 if (off >= endOffset || data[off] == '\r' || data[off] == '\n') { 355 // go to the next one 356 continue; 357 } 358 else { 359 // make sure we account for the spaces in the output count. 360 bytesWritten += trailingSpaces; 361 // write out the blank characters we counted and continue with the non-blank. 362 while (trailingSpaces-- > 0) { 363 out.write(' '); 364 } 365 } 366 } 367 else if (ch == '=') { 368 // we found an encoded character. Reduce the 3 char sequence to one. 369 // but first, make sure we have two characters to work with. 370 if (off + 1 >= endOffset) { 371 throw new IOException("Invalid quoted printable encoding"); 372 } 373 // convert the two bytes back from hex. 374 byte b1 = data[off++]; 375 byte b2 = data[off++]; 376 377 // we've found an encoded carriage return. The next char needs to be a newline 378 if (b1 == '\r') { 379 if (b2 != '\n') { 380 throw new IOException("Invalid quoted printable encoding"); 381 } 382 // this was a soft linebreak inserted by the encoding. We just toss this away 383 // on decode. 384 } 385 else { 386 // this is a hex pair we need to convert back to a single byte. 387 b1 = decodingTable[b1]; 388 b2 = decodingTable[b2]; 389 out.write((b1 << 4) | b2); 390 // 3 bytes in, one byte out 391 bytesWritten++; 392 } 393 } 394 else { 395 // simple character, just write it out. 396 out.write(ch); 397 bytesWritten++; 398 } 399 } 400 401 return bytesWritten; 402 } 403 404 /** 405 * Decode a byte array of data. 406 * 407 * @param data The data array. 408 * @param out The output stream target for the decoded data. 409 * 410 * @return The number of bytes written to the stream. 411 * @exception IOException 412 */ 413 public int decodeWord(byte[] data, OutputStream out) throws IOException { 414 return decodeWord(data, 0, data.length, out); 415 } 416 417 418 /** 419 * decode the uuencoded byte data writing it to the given output stream 420 * 421 * @param data The array of byte data to decode. 422 * @param off Starting offset within the array. 423 * @param length The length of data to encode. 424 * @param out The output stream used to return the decoded data. 425 * 426 * @return the number of bytes produced. 427 * @exception IOException 428 */ 429 public int decodeWord(byte[] data, int off, int length, OutputStream out) throws IOException { 430 // make sure we're writing to the correct stream 431 this.out = out; 432 433 int endOffset = off + length; 434 int bytesWritten = 0; 435 436 while (off < endOffset) { 437 byte ch = data[off++]; 438 439 // space characters were translated to '_' on encode, so we need to translate them back. 440 if (ch == '_') { 441 out.write(' '); 442 } 443 else if (ch == '=') { 444 // we found an encoded character. Reduce the 3 char sequence to one. 445 // but first, make sure we have two characters to work with. 446 if (off + 1 >= endOffset) { 447 throw new IOException("Invalid quoted printable encoding"); 448 } 449 // convert the two bytes back from hex. 450 byte b1 = data[off++]; 451 byte b2 = data[off++]; 452 453 // we've found an encoded carriage return. The next char needs to be a newline 454 if (b1 == '\r') { 455 if (b2 != '\n') { 456 throw new IOException("Invalid quoted printable encoding"); 457 } 458 // this was a soft linebreak inserted by the encoding. We just toss this away 459 // on decode. 460 } 461 else { 462 // this is a hex pair we need to convert back to a single byte. 463 byte c1 = decodingTable[b1]; 464 byte c2 = decodingTable[b2]; 465 out.write((c1 << 4) | c2); 466 // 3 bytes in, one byte out 467 bytesWritten++; 468 } 469 } 470 else { 471 // simple character, just write it out. 472 out.write(ch); 473 bytesWritten++; 474 } 475 } 476 477 return bytesWritten; 478 } 479 480 481 /** 482 * decode the UUEncoded String data writing it to the given output stream. 483 * 484 * @param data The String data to decode. 485 * @param out The output stream to write the decoded data to. 486 * 487 * @return the number of bytes produced. 488 * @exception IOException 489 */ 490 public int decode(String data, OutputStream out) throws IOException { 491 try { 492 // just get the byte data and decode. 493 byte[] bytes = data.getBytes("US-ASCII"); 494 return decode(bytes, 0, bytes.length, out); 495 } catch (UnsupportedEncodingException e) { 496 throw new IOException("Invalid UUEncoding"); 497 } 498 } 499 500 private void checkLineLength(int required) throws IOException { 501 // if we're at our line length limit, write out a soft line break and reset. 502 if ((lineCount + required) >= lineLength ) { 503 out.write('='); 504 out.write('\r'); 505 out.write('\n'); 506 bytesWritten += 3; 507 lineCount = 0; 508 } 509 } 510 511 512 public void writeEncodedCharacter(int ch) throws IOException { 513 // we need 3 characters for an encoded value 514 checkLineLength(3); 515 out.write('='); 516 out.write(encodingTable[ch >> 4]); 517 out.write(encodingTable[ch & 0x0F]); 518 lineCount += 3; 519 bytesWritten += 3; 520 } 521 522 523 public void writeCharacter(int ch) throws IOException { 524 // we need 3 characters for an encoded value 525 checkLineLength(1); 526 out.write(ch); 527 lineCount++; 528 bytesWritten++; 529 } 530 531 532 public void writeEOL() throws IOException { 533 out.write('\r'); 534 out.write('\n'); 535 lineCount = 0; 536 bytesWritten += 3; 537 } 538 539 540 public int decode(InputStream in) throws IOException { 541 542 // we potentially need to scan over spans of whitespace characters to determine if they're real 543 // we just return blanks until the count goes to zero. 544 if (deferredWhitespace > 0) { 545 deferredWhitespace--; 546 return ' '; 547 } 548 549 // we may have needed to scan ahead to find the first non-blank character, which we would store here. 550 // hand that back once we're done with the blanks. 551 if (cachedCharacter != -1) { 552 int result = cachedCharacter; 553 cachedCharacter = -1; 554 return result; 555 } 556 557 int ch = in.read(); 558 559 // reflect back an EOF condition. 560 if (ch == -1) { 561 return -1; 562 } 563 564 // space characters are a pain. We need to scan ahead until we find a non-space character. 565 // if the character is a line terminator, we need to discard the blanks. 566 if (ch == ' ') { 567 // scan forward, counting the characters. 568 while ((ch = in.read()) == ' ') { 569 deferredWhitespace++; 570 } 571 572 // is this a lineend at the current location? 573 if (ch == -1 || ch == '\r' || ch == '\n') { 574 // those blanks we so zealously counted up don't really exist. Clear out the counter. 575 deferredWhitespace = 0; 576 // return the real significant character now. 577 return ch; 578 } 579 // remember this character for later, after we've used up the deferred blanks. 580 cachedCharacter = decodeNonspaceChar(in, ch); 581 // return this space. We did not include this one in the deferred count, so we're right in sync. 582 return ' '; 583 } 584 return decodeNonspaceChar(in, ch); 585 } 586 587 private int decodeNonspaceChar(InputStream in, int ch) throws IOException { 588 if (ch == '=') { 589 int b1 = in.read(); 590 // we need to get two characters after the quotation marker 591 if (b1 == -1) { 592 throw new IOException("Truncated quoted printable data"); 593 } 594 int b2 = in.read(); 595 // we need to get two characters after the quotation marker 596 if (b2 == -1) { 597 throw new IOException("Truncated quoted printable data"); 598 } 599 600 // we've found an encoded carriage return. The next char needs to be a newline 601 if (b1 == '\r') { 602 if (b2 != '\n') { 603 throw new IOException("Invalid quoted printable encoding"); 604 } 605 // this was a soft linebreak inserted by the encoding. We just toss this away 606 // on decode. We need to return something, so recurse and decode the next. 607 return decode(in); 608 } 609 else { 610 // this is a hex pair we need to convert back to a single byte. 611 b1 = decodingTable[b1]; 612 b2 = decodingTable[b2]; 613 return (b1 << 4) | b2; 614 } 615 } 616 else { 617 return ch; 618 } 619 } 620 621 622 /** 623 * Perform RFC-2047 word encoding using Q-P data encoding. 624 * 625 * @param in The source for the encoded data. 626 * @param charset The charset tag to be added to each encoded data section. 627 * @param specials The set of special characters that we require to encoded. 628 * @param out The output stream where the encoded data is to be written. 629 * @param fold Controls whether separate sections of encoded data are separated by 630 * linebreaks or whitespace. 631 * 632 * @exception IOException 633 */ 634 public void encodeWord(InputStream in, String charset, String specials, OutputStream out, boolean fold) throws IOException 635 { 636 // we need to scan ahead in a few places, which may require pushing characters back on to the stream. 637 // make sure we have a stream where this is possible. 638 PushbackInputStream inStream = new PushbackInputStream(in); 639 PrintStream writer = new PrintStream(out); 640 641 // segments of encoded data are limited to 75 byes, including the control sections. 642 int limit = 75 - 7 - charset.length(); 643 boolean firstLine = true; 644 StringBuffer encodedString = new StringBuffer(76); 645 646 while (true) { 647 648 // encode another segment of data. 649 encode(inStream, encodedString, specials, limit); 650 // nothing encoded means we've hit the end of the data. 651 if (encodedString.length() == 0) { 652 break; 653 } 654 // if we have more than one segment, we need to insert separators. Depending on whether folding 655 // was requested, this is either a blank or a linebreak. 656 if (!firstLine) { 657 if (fold) { 658 writer.print("\r\n"); 659 } 660 else { 661 writer.print(" "); 662 } 663 } 664 665 // add the encoded word header 666 writer.print("=?"); 667 writer.print(charset); 668 writer.print("?Q?"); 669 // the data 670 writer.print(encodedString.toString()); 671 // and the terminator mark 672 writer.print("?="); 673 writer.flush(); 674 675 // we reset the string buffer and reuse it. 676 encodedString.setLength(0); 677 // we need a delimiter between sections from this point on. 678 firstLine = false; 679 } 680 } 681 682 683 /** 684 * Perform RFC-2047 word encoding using Base64 data encoding. 685 * 686 * @param in The source for the encoded data. 687 * @param charset The charset tag to be added to each encoded data section. 688 * @param out The output stream where the encoded data is to be written. 689 * @param fold Controls whether separate sections of encoded data are separated by 690 * linebreaks or whitespace. 691 * 692 * @exception IOException 693 */ 694 public void encodeWord(byte[] data, StringBuffer out, String charset, String specials) throws IOException 695 { 696 // append the word header 697 out.append("=?"); 698 out.append(charset); 699 out.append("?Q?"); 700 // add on the encodeded data 701 encodeWordData(data, out, specials); 702 // the end of the encoding marker 703 out.append("?="); 704 } 705 706 707 /** 708 * Perform RFC-2047 word encoding using Q-P data encoding. 709 * 710 * @param in The source for the encoded data. 711 * @param charset The charset tag to be added to each encoded data section. 712 * @param specials The set of special characters that we require to encoded. 713 * @param out The output stream where the encoded data is to be written. 714 * @param fold Controls whether separate sections of encoded data are separated by 715 * linebreaks or whitespace. 716 * 717 * @exception IOException 718 */ 719 public void encodeWordData(byte[] data, StringBuffer out, String specials) throws IOException { 720 for (int i = 0; i < data.length; i++) { 721 int ch = data[i] & 0xff; ; 722 723 // spaces require special handling. If the next character is a line terminator, then 724 // the space needs to be encoded. 725 if (ch == ' ') { 726 // blanks get translated into underscores, because the encoded tokens can't have embedded blanks. 727 out.append('_'); 728 } 729 // non-ascii chars and the designated specials all get encoded. 730 else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) { 731 out.append('='); 732 out.append((char)encodingTable[ch >> 4]); 733 out.append((char)encodingTable[ch & 0x0F]); 734 } 735 else { 736 // good character, just use unchanged. 737 out.append((char)ch); 738 } 739 } 740 } 741 742 743 /** 744 * Estimate the final encoded size of a segment of data. 745 * This is used to ensure that the encoded blocks do 746 * not get split across a unicode character boundary and 747 * that the encoding will fit within the bounds of 748 * a mail header line. 749 * 750 * @param data The data we're anticipating encoding. 751 * 752 * @return The size of the byte data in encoded form. 753 */ 754 public int estimateEncodedLength(byte[] data, String specials) 755 { 756 int count = 0; 757 758 for (int i = 0; i < data.length; i++) { 759 // make sure this is just a single byte value. 760 int ch = data[i] & 0xff; 761 762 // non-ascii chars and the designated specials all get encoded. 763 if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) { 764 // Q encoding translates a single char into 3 characters 765 count += 3; 766 } 767 else { 768 // non-encoded character 769 count++; 770 } 771 } 772 return count; 773 } 774 } 775 776 777