1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 20 package org.apache.geronimo.mail.util; 21 22 import java.io.EOFException; 23 import java.io.IOException; 24 import java.io.InputStream; 25 import java.io.OutputStream; 26 import java.io.PrintStream; 27 import java.io.PushbackInputStream; 28 import java.io.UnsupportedEncodingException; 29 30 public class QuotedPrintableEncoder implements Encoder { 31 32 static protected final byte[] encodingTable = 33 { 34 (byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5', (byte)'6', (byte)'7', 35 (byte)'8', (byte)'9', (byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F' 36 }; 37 38 /* 39 * set up the decoding table. 40 */ 41 static protected final byte[] decodingTable = new byte[128]; 42 43 static { 44 // initialize the decoding table 45 for (int i = 0; i < encodingTable.length; i++) 46 { 47 decodingTable[encodingTable[i]] = (byte)i; 48 } 49 } 50 51 52 // default number of characters we will write per line. 53 static private final int DEFAULT_CHARS_PER_LINE = 76; 54 55 // the output stream we're wrapped around 56 protected OutputStream out; 57 // the number of bytes written; 58 protected int bytesWritten = 0; 59 // number of bytes written on the current line 60 protected int lineCount = 0; 61 // line length we're dealing with 62 protected int lineLength; 63 // number of deferred whitespace characters in decode mode. 64 protected int deferredWhitespace = 0; 65 66 protected int cachedCharacter = -1; 67 68 // indicates whether the last character was a '\r', potentially part of a CRLF sequence. 69 protected boolean lastCR = false; 70 // remember whether last character was a white space. 71 protected boolean lastWhitespace = false; 72 73 public QuotedPrintableEncoder() { 74 this(null, DEFAULT_CHARS_PER_LINE); 75 } 76 77 public QuotedPrintableEncoder(OutputStream out) { 78 this(out, DEFAULT_CHARS_PER_LINE); 79 } 80 81 public QuotedPrintableEncoder(OutputStream out, int lineLength) { 82 this.out = out; 83 this.lineLength = lineLength; 84 } 85 86 private void checkDeferred(int ch) throws IOException { 87 // was the last character we looked at a whitespace? Try to decide what to do with it now. 88 if (lastWhitespace) { 89 // if this whitespace is at the end of the line, write it out encoded 90 if (ch == '\r' || ch == '\n') { 91 writeEncodedCharacter(' '); 92 } 93 else { 94 // we can write this out without encoding. 95 writeCharacter(' '); 96 } 97 // we always turn this off. 98 lastWhitespace = false; 99 } 100 // deferred carriage return? 101 else if (lastCR) { 102 // if the char following the CR was not a new line, write an EOL now. 103 if (ch != '\n') { 104 writeEOL(); 105 } 106 // we always turn this off too 107 lastCR = false; 108 } 109 } 110 111 112 /** 113 * encode the input data producing a UUEncoded output stream. 114 * 115 * @param data The array of byte data. 116 * @param off The starting offset within the data. 117 * @param length Length of the data to encode. 118 * 119 * @return the number of bytes produced. 120 */ 121 public int encode(byte[] data, int off, int length) throws IOException { 122 int endOffset = off + length; 123 124 while (off < endOffset) { 125 // get the character 126 byte ch = data[off++]; 127 128 // handle the encoding of this character. 129 encode(ch); 130 } 131 132 return bytesWritten; 133 } 134 135 136 public void encode(int ch) throws IOException { 137 // make sure this is just a single byte value. 138 ch = ch &0xFF; 139 140 // see if we had to defer handling of a whitespace or '\r' character, and handle it if necessary. 141 checkDeferred(ch); 142 // different characters require special handling. 143 switch (ch) { 144 // spaces require special handling. If the next character is a line terminator, then 145 // the space needs to be encoded. 146 case ' ': 147 { 148 // at this point, we don't know whether this needs encoding or not. If the next 149 // character is a linend, it gets encoded. If anything else, we just write it as is. 150 lastWhitespace = true; 151 // turn off any CR flags. 152 lastCR = false; 153 break; 154 } 155 156 // carriage return, which may be part of a CRLF sequence. 157 case '\r': 158 { 159 // just flag this until we see the next character. 160 lastCR = true; 161 break; 162 } 163 164 // a new line character...we need to check to see if it was paired up with a '\r' char. 165 case '\n': 166 { 167 // we always write this out for a newline. We defer CRs until we see if the LF follows. 168 writeEOL(); 169 break; 170 } 171 172 // an '=' is the escape character for an encoded character, so it must also 173 // be written encoded. 174 case '=': 175 { 176 writeEncodedCharacter(ch); 177 break; 178 } 179 180 // all other characters. If outside the printable character range, write it encoded. 181 default: 182 { 183 if (ch < 32 || ch >= 127) { 184 writeEncodedCharacter(ch); 185 } 186 else { 187 writeCharacter(ch); 188 } 189 break; 190 } 191 } 192 } 193 194 195 /** 196 * encode the input data producing a UUEncoded output stream. 197 * 198 * @param data The array of byte data. 199 * @param off The starting offset within the data. 200 * @param length Length of the data to encode. 201 * 202 * @return the number of bytes produced. 203 */ 204 public int encode(byte[] data, int off, int length, String specials) throws IOException { 205 int endOffset = off + length; 206 207 while (off < endOffset) { 208 // get the character 209 byte ch = data[off++]; 210 211 // handle the encoding of this character. 212 encode(ch, specials); 213 } 214 215 return bytesWritten; 216 } 217 218 219 /** 220 * encode the input data producing a UUEncoded output stream. 221 * 222 * @param data The array of byte data. 223 * @param off The starting offset within the data. 224 * @param length Length of the data to encode. 225 * 226 * @return the number of bytes produced. 227 */ 228 public int encode(PushbackInputStream in, StringBuffer out, String specials, int limit) throws IOException { 229 int count = 0; 230 231 while (count < limit) { 232 int ch = in.read(); 233 234 if (ch == -1) { 235 return count; 236 } 237 // make sure this is just a single byte value. 238 ch = ch &0xFF; 239 240 // spaces require special handling. If the next character is a line terminator, then 241 // the space needs to be encoded. 242 if (ch == ' ') { 243 // blanks get translated into underscores, because the encoded tokens can't have embedded blanks. 244 out.append('_'); 245 count++; 246 } 247 // non-ascii chars and the designated specials all get encoded. 248 else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) { 249 // we need at least 3 characters to write this out, so we need to 250 // forget we saw this one and try in the next segment. 251 if (count + 3 > limit) { 252 in.unread(ch); 253 return count; 254 } 255 out.append('='); 256 out.append((char)encodingTable[ch >> 4]); 257 out.append((char)encodingTable[ch & 0x0F]); 258 count += 3; 259 } 260 else { 261 // good character, just use unchanged. 262 out.append((char)ch); 263 count++; 264 } 265 } 266 return count; 267 } 268 269 270 /** 271 * Specialized version of the decoder that handles encoding of 272 * RFC 2047 encoded word values. This has special handling for 273 * certain characters, but less special handling for blanks and 274 * linebreaks. 275 * 276 * @param ch 277 * @param specials 278 * 279 * @exception IOException 280 */ 281 public void encode(int ch, String specials) throws IOException { 282 // make sure this is just a single byte value. 283 ch = ch &0xFF; 284 285 // spaces require special handling. If the next character is a line terminator, then 286 // the space needs to be encoded. 287 if (ch == ' ') { 288 // blanks get translated into underscores, because the encoded tokens can't have embedded blanks. 289 writeCharacter('_'); 290 } 291 // non-ascii chars and the designated specials all get encoded. 292 else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) { 293 writeEncodedCharacter(ch); 294 } 295 else { 296 // good character, just use unchanged. 297 writeCharacter(ch); 298 } 299 } 300 301 302 /** 303 * encode the input data producing a UUEncoded output stream. 304 * 305 * @param data The array of byte data. 306 * @param off The starting offset within the data. 307 * @param length Length of the data to encode. 308 * @param out The output stream the encoded data is written to. 309 * 310 * @return the number of bytes produced. 311 */ 312 public int encode(byte[] data, int off, int length, OutputStream out) throws IOException { 313 // make sure we're writing to the correct stream 314 this.out = out; 315 bytesWritten = 0; 316 317 // do the actual encoding 318 return encode(data, off, length); 319 } 320 321 322 /** 323 * decode the uuencoded byte data writing it to the given output stream 324 * 325 * @param data The array of byte data to decode. 326 * @param off Starting offset within the array. 327 * @param length The length of data to encode. 328 * @param out The output stream used to return the decoded data. 329 * 330 * @return the number of bytes produced. 331 * @exception IOException 332 */ 333 public int decode(byte[] data, int off, int length, OutputStream out) throws IOException { 334 // make sure we're writing to the correct stream 335 this.out = out; 336 337 int endOffset = off + length; 338 int bytesWritten = 0; 339 340 while (off < endOffset) { 341 byte ch = data[off++]; 342 343 // space characters are a pain. We need to scan ahead until we find a non-space character. 344 // if the character is a line terminator, we need to discard the blanks. 345 if (ch == ' ') { 346 int trailingSpaces = 1; 347 // scan forward, counting the characters. 348 while (off < endOffset && data[off] == ' ') { 349 // step forward and count this. 350 off++; 351 trailingSpaces++; 352 } 353 // is this a lineend at the current location? 354 if (off >= endOffset || data[off] == '\r' || data[off] == '\n') { 355 // go to the next one 356 continue; 357 } 358 else { 359 // make sure we account for the spaces in the output count. 360 bytesWritten += trailingSpaces; 361 // write out the blank characters we counted and continue with the non-blank. 362 while (trailingSpaces-- > 0) { 363 out.write(' '); 364 } 365 } 366 } 367 else if (ch == '=') { 368 // we found an encoded character. Reduce the 3 char sequence to one. 369 // but first, make sure we have two characters to work with. 370 if (off + 1 >= endOffset) { 371 throw new IOException("Invalid quoted printable encoding"); 372 } 373 // convert the two bytes back from hex. 374 byte b1 = data[off++]; 375 byte b2 = data[off++]; 376 377 // we've found an encoded carriage return. The next char needs to be a newline 378 if (b1 == '\r') { 379 if (b2 != '\n') { 380 throw new IOException("Invalid quoted printable encoding"); 381 } 382 // this was a soft linebreak inserted by the encoding. We just toss this away 383 // on decode. 384 } 385 else { 386 // this is a hex pair we need to convert back to a single byte. 387 b1 = decodingTable[b1]; 388 b2 = decodingTable[b2]; 389 out.write((b1 << 4) | b2); 390 // 3 bytes in, one byte out 391 bytesWritten++; 392 } 393 } 394 else { 395 // simple character, just write it out. 396 out.write(ch); 397 bytesWritten++; 398 } 399 } 400 401 return bytesWritten; 402 } 403 404 /** 405 * Decode a byte array of data. 406 * 407 * @param data The data array. 408 * @param out The output stream target for the decoded data. 409 * 410 * @return The number of bytes written to the stream. 411 * @exception IOException 412 */ 413 public int decodeWord(byte[] data, OutputStream out) throws IOException { 414 return decodeWord(data, 0, data.length, out); 415 } 416 417 418 /** 419 * decode the uuencoded byte data writing it to the given output stream 420 * 421 * @param data The array of byte data to decode. 422 * @param off Starting offset within the array. 423 * @param length The length of data to encode. 424 * @param out The output stream used to return the decoded data. 425 * 426 * @return the number of bytes produced. 427 * @exception IOException 428 */ 429 public int decodeWord(byte[] data, int off, int length, OutputStream out) throws IOException { 430 // make sure we're writing to the correct stream 431 this.out = out; 432 433 int endOffset = off + length; 434 int bytesWritten = 0; 435 436 while (off < endOffset) { 437 byte ch = data[off++]; 438 439 // space characters were translated to '_' on encode, so we need to translate them back. 440 if (ch == '_') { 441 out.write(' '); 442 } 443 else if (ch == '=') { 444 // we found an encoded character. Reduce the 3 char sequence to one. 445 // but first, make sure we have two characters to work with. 446 if (off + 1 >= endOffset) { 447 throw new IOException("Invalid quoted printable encoding"); 448 } 449 // convert the two bytes back from hex. 450 byte b1 = data[off++]; 451 byte b2 = data[off++]; 452 453 // we've found an encoded carriage return. The next char needs to be a newline 454 if (b1 == '\r') { 455 if (b2 != '\n') { 456 throw new IOException("Invalid quoted printable encoding"); 457 } 458 // this was a soft linebreak inserted by the encoding. We just toss this away 459 // on decode. 460 } 461 else { 462 // this is a hex pair we need to convert back to a single byte. 463 byte c1 = decodingTable[b1]; 464 byte c2 = decodingTable[b2]; 465 out.write((c1 << 4) | c2); 466 // 3 bytes in, one byte out 467 bytesWritten++; 468 } 469 } 470 else { 471 // simple character, just write it out. 472 out.write(ch); 473 bytesWritten++; 474 } 475 } 476 477 return bytesWritten; 478 } 479 480 481 /** 482 * decode the UUEncoded String data writing it to the given output stream. 483 * 484 * @param data The String data to decode. 485 * @param out The output stream to write the decoded data to. 486 * 487 * @return the number of bytes produced. 488 * @exception IOException 489 */ 490 public int decode(String data, OutputStream out) throws IOException { 491 try { 492 // just get the byte data and decode. 493 byte[] bytes = data.getBytes("US-ASCII"); 494 return decode(bytes, 0, bytes.length, out); 495 } catch (UnsupportedEncodingException e) { 496 throw new IOException("Invalid UUEncoding"); 497 } 498 } 499 500 private void checkLineLength(int required) throws IOException { 501 // if we're at our line length limit, write out a soft line break and reset. 502 if ((lineCount + required) >= lineLength ) { 503 out.write('='); 504 out.write('\r'); 505 out.write('\n'); 506 bytesWritten += 3; 507 lineCount = 0; 508 } 509 } 510 511 512 public void writeEncodedCharacter(int ch) throws IOException { 513 // we need 3 characters for an encoded value 514 checkLineLength(3); 515 out.write('='); 516 out.write(encodingTable[ch >> 4]); 517 out.write(encodingTable[ch & 0x0F]); 518 lineCount += 3; 519 bytesWritten += 3; 520 } 521 522 523 public void writeCharacter(int ch) throws IOException { 524 // we need 3 characters for an encoded value 525 checkLineLength(1); 526 out.write(ch); 527 lineCount++; 528 bytesWritten++; 529 } 530 531 532 public void writeEOL() throws IOException { 533 out.write('\r'); 534 out.write('\n'); 535 lineCount = 0; 536 bytesWritten += 3; 537 } 538 539 540 public int decode(InputStream in) throws IOException { 541 542 // we potentially need to scan over spans of whitespace characters to determine if they're real 543 // we just return blanks until the count goes to zero. 544 if (deferredWhitespace > 0) { 545 deferredWhitespace--; 546 return ' '; 547 } 548 549 // we may have needed to scan ahead to find the first non-blank character, which we would store here. 550 // hand that back once we're done with the blanks. 551 if (cachedCharacter != -1) { 552 int result = cachedCharacter; 553 cachedCharacter = -1; 554 return result; 555 } 556 557 int ch = in.read(); 558 559 // reflect back an EOF condition. 560 if (ch == -1) { 561 return -1; 562 } 563 564 // space characters are a pain. We need to scan ahead until we find a non-space character. 565 // if the character is a line terminator, we need to discard the blanks. 566 if (ch == ' ') { 567 // scan forward, counting the characters. 568 while ((ch = in.read()) == ' ') { 569 deferredWhitespace++; 570 } 571 572 // is this a lineend at the current location? 573 if (ch == -1 || ch == '\r' || ch == '\n') { 574 // those blanks we so zealously counted up don't really exist. Clear out the counter. 575 deferredWhitespace = 0; 576 // return the real significant character now. 577 return ch; 578 } 579 // remember this character for later, after we've used up the deferred blanks. 580 cachedCharacter = decodeNonspaceChar(in, ch); 581 // return this space. We did not include this one in the deferred count, so we're right in sync. 582 return ' '; 583 } 584 return decodeNonspaceChar(in, ch); 585 } 586 587 private int decodeNonspaceChar(InputStream in, int ch) throws IOException { 588 if (ch == '=') { 589 int b1 = in.read(); 590 // we need to get two characters after the quotation marker 591 if (b1 == -1) { 592 throw new IOException("Truncated quoted printable data"); 593 } 594 int b2 = in.read(); 595 // we need to get two characters after the quotation marker 596 if (b2 == -1) { 597 throw new IOException("Truncated quoted printable data"); 598 } 599 600 // we've found an encoded carriage return. The next char needs to be a newline 601 if (b1 == '\r') { 602 if (b2 != '\n') { 603 throw new IOException("Invalid quoted printable encoding"); 604 } 605 // this was a soft linebreak inserted by the encoding. We just toss this away 606 // on decode. We need to return something, so recurse and decode the next. 607 return decode(in); 608 } 609 else { 610 // this is a hex pair we need to convert back to a single byte. 611 b1 = decodingTable[b1]; 612 b2 = decodingTable[b2]; 613 return (b1 << 4) | b2; 614 } 615 } 616 else { 617 return ch; 618 } 619 } 620 621 622 /** 623 * Perform RFC-2047 word encoding using Q-P data encoding. 624 * 625 * @param in The source for the encoded data. 626 * @param charset The charset tag to be added to each encoded data section. 627 * @param specials The set of special characters that we require to encoded. 628 * @param out The output stream where the encoded data is to be written. 629 * @param fold Controls whether separate sections of encoded data are separated by 630 * linebreaks or whitespace. 631 * 632 * @exception IOException 633 */ 634 public void encodeWord(InputStream in, String charset, String specials, OutputStream out, boolean fold) throws IOException 635 { 636 // we need to scan ahead in a few places, which may require pushing characters back on to the stream. 637 // make sure we have a stream where this is possible. 638 PushbackInputStream inStream = new PushbackInputStream(in); 639 PrintStream writer = new PrintStream(out); 640 641 // segments of encoded data are limited to 75 byes, including the control sections. 642 int limit = 75 - 7 - charset.length(); 643 boolean firstLine = true; 644 StringBuffer encodedString = new StringBuffer(76); 645 646 while (true) { 647 648 // encode another segment of data. 649 encode(inStream, encodedString, specials, limit); 650 // nothing encoded means we've hit the end of the data. 651 if (encodedString.length() == 0) { 652 break; 653 } 654 // if we have more than one segment, we need to insert separators. Depending on whether folding 655 // was requested, this is either a blank or a linebreak. 656 if (!firstLine) { 657 if (fold) { 658 writer.print("\r\n"); 659 } 660 else { 661 writer.print(" "); 662 } 663 } 664 665 // add the encoded word header 666 writer.print("=?"); 667 writer.print(charset); 668 writer.print("?Q?"); 669 // the data 670 writer.print(encodedString.toString()); 671 // and the terminator mark 672 writer.print("?="); 673 writer.flush(); 674 675 // we reset the string buffer and reuse it. 676 encodedString.setLength(0); 677 // we need a delimiter between sections from this point on. 678 firstLine = false; 679 } 680 } 681 682 683 /** 684 * Perform RFC-2047 word encoding using Base64 data encoding. 685 * 686 * @param in The source for the encoded data. 687 * @param charset The charset tag to be added to each encoded data section. 688 * @param out The output stream where the encoded data is to be written. 689 * @param fold Controls whether separate sections of encoded data are separated by 690 * linebreaks or whitespace. 691 * 692 * @exception IOException 693 */ 694 public void encodeWord(byte[] data, StringBuffer out, String charset, String specials) throws IOException 695 { 696 // append the word header 697 out.append("=?"); 698 out.append(charset); 699 out.append("?Q?"); 700 // add on the encodeded data 701 encodeWordData(data, out, specials); 702 // the end of the encoding marker 703 out.append("?="); 704 } 705 706 707 /** 708 * Perform RFC-2047 word encoding using Q-P data encoding. 709 * 710 * @param in The source for the encoded data. 711 * @param charset The charset tag to be added to each encoded data section. 712 * @param specials The set of special characters that we require to encoded. 713 * @param out The output stream where the encoded data is to be written. 714 * @param fold Controls whether separate sections of encoded data are separated by 715 * linebreaks or whitespace. 716 * 717 * @exception IOException 718 */ 719 public void encodeWordData(byte[] data, StringBuffer out, String specials) throws IOException { 720 for (int i = 0; i < data.length; i++) { 721 int ch = data[i] & 0xff; ; 722 723 // spaces require special handling. If the next character is a line terminator, then 724 // the space needs to be encoded. 725 if (ch == ' ') { 726 // blanks get translated into underscores, because the encoded tokens can't have embedded blanks. 727 out.append('_'); 728 } 729 // non-ascii chars and the designated specials all get encoded. 730 else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) { 731 out.append('='); 732 out.append((char)encodingTable[ch >> 4]); 733 out.append((char)encodingTable[ch & 0x0F]); 734 } 735 else { 736 // good character, just use unchanged. 737 out.append((char)ch); 738 } 739 } 740 } 741 742 743 /** 744 * Estimate the final encoded size of a segment of data. 745 * This is used to ensure that the encoded blocks do 746 * not get split across a unicode character boundary and 747 * that the encoding will fit within the bounds of 748 * a mail header line. 749 * 750 * @param data The data we're anticipating encoding. 751 * 752 * @return The size of the byte data in encoded form. 753 */ 754 public int estimateEncodedLength(byte[] data, String specials) 755 { 756 int count = 0; 757 758 for (int i = 0; i < data.length; i++) { 759 // make sure this is just a single byte value. 760 int ch = data[i] & 0xff; 761 762 // non-ascii chars and the designated specials all get encoded. 763 if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) { 764 // Q encoding translates a single char into 3 characters 765 count += 3; 766 } 767 else { 768 // non-encoded character 769 count++; 770 } 771 } 772 return count; 773 } 774 } 775 776 777