1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20 package org.apache.geronimo.mail.util;
21
22 import java.io.EOFException;
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.io.OutputStream;
26 import java.io.PrintStream;
27 import java.io.PushbackInputStream;
28 import java.io.UnsupportedEncodingException;
29
30 public class QuotedPrintableEncoder implements Encoder {
31
32 static protected final byte[] encodingTable =
33 {
34 (byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5', (byte)'6', (byte)'7',
35 (byte)'8', (byte)'9', (byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F'
36 };
37
38 /*
39 * set up the decoding table.
40 */
41 static protected final byte[] decodingTable = new byte[128];
42
43 static {
44 // initialize the decoding table
45 for (int i = 0; i < encodingTable.length; i++)
46 {
47 decodingTable[encodingTable[i]] = (byte)i;
48 }
49 }
50
51
52 // default number of characters we will write per line.
53 static private final int DEFAULT_CHARS_PER_LINE = 76;
54
55 // the output stream we're wrapped around
56 protected OutputStream out;
57 // the number of bytes written;
58 protected int bytesWritten = 0;
59 // number of bytes written on the current line
60 protected int lineCount = 0;
61 // line length we're dealing with
62 protected int lineLength;
63 // number of deferred whitespace characters in decode mode.
64 protected int deferredWhitespace = 0;
65
66 protected int cachedCharacter = -1;
67
68 // indicates whether the last character was a '\r', potentially part of a CRLF sequence.
69 protected boolean lastCR = false;
70 // remember whether last character was a white space.
71 protected boolean lastWhitespace = false;
72
73 public QuotedPrintableEncoder() {
74 this(null, DEFAULT_CHARS_PER_LINE);
75 }
76
77 public QuotedPrintableEncoder(OutputStream out) {
78 this(out, DEFAULT_CHARS_PER_LINE);
79 }
80
81 public QuotedPrintableEncoder(OutputStream out, int lineLength) {
82 this.out = out;
83 this.lineLength = lineLength;
84 }
85
86 private void checkDeferred(int ch) throws IOException {
87 // was the last character we looked at a whitespace? Try to decide what to do with it now.
88 if (lastWhitespace) {
89 // if this whitespace is at the end of the line, write it out encoded
90 if (ch == '\r' || ch == '\n') {
91 writeEncodedCharacter(' ');
92 }
93 else {
94 // we can write this out without encoding.
95 writeCharacter(' ');
96 }
97 // we always turn this off.
98 lastWhitespace = false;
99 }
100 // deferred carriage return?
101 else if (lastCR) {
102 // if the char following the CR was not a new line, write an EOL now.
103 if (ch != '\n') {
104 writeEOL();
105 }
106 // we always turn this off too
107 lastCR = false;
108 }
109 }
110
111
112 /**
113 * encode the input data producing a UUEncoded output stream.
114 *
115 * @param data The array of byte data.
116 * @param off The starting offset within the data.
117 * @param length Length of the data to encode.
118 *
119 * @return the number of bytes produced.
120 */
121 public int encode(byte[] data, int off, int length) throws IOException {
122 int endOffset = off + length;
123
124 while (off < endOffset) {
125 // get the character
126 byte ch = data[off++];
127
128 // handle the encoding of this character.
129 encode(ch);
130 }
131
132 return bytesWritten;
133 }
134
135
136 public void encode(int ch) throws IOException {
137 // make sure this is just a single byte value.
138 ch = ch &0xFF;
139
140 // see if we had to defer handling of a whitespace or '\r' character, and handle it if necessary.
141 checkDeferred(ch);
142 // different characters require special handling.
143 switch (ch) {
144 // spaces require special handling. If the next character is a line terminator, then
145 // the space needs to be encoded.
146 case ' ':
147 {
148 // at this point, we don't know whether this needs encoding or not. If the next
149 // character is a linend, it gets encoded. If anything else, we just write it as is.
150 lastWhitespace = true;
151 // turn off any CR flags.
152 lastCR = false;
153 break;
154 }
155
156 // carriage return, which may be part of a CRLF sequence.
157 case '\r':
158 {
159 // just flag this until we see the next character.
160 lastCR = true;
161 break;
162 }
163
164 // a new line character...we need to check to see if it was paired up with a '\r' char.
165 case '\n':
166 {
167 // we always write this out for a newline. We defer CRs until we see if the LF follows.
168 writeEOL();
169 break;
170 }
171
172 // an '=' is the escape character for an encoded character, so it must also
173 // be written encoded.
174 case '=':
175 {
176 writeEncodedCharacter(ch);
177 break;
178 }
179
180 // all other characters. If outside the printable character range, write it encoded.
181 default:
182 {
183 if (ch < 32 || ch >= 127) {
184 writeEncodedCharacter(ch);
185 }
186 else {
187 writeCharacter(ch);
188 }
189 break;
190 }
191 }
192 }
193
194
195 /**
196 * encode the input data producing a UUEncoded output stream.
197 *
198 * @param data The array of byte data.
199 * @param off The starting offset within the data.
200 * @param length Length of the data to encode.
201 *
202 * @return the number of bytes produced.
203 */
204 public int encode(byte[] data, int off, int length, String specials) throws IOException {
205 int endOffset = off + length;
206
207 while (off < endOffset) {
208 // get the character
209 byte ch = data[off++];
210
211 // handle the encoding of this character.
212 encode(ch, specials);
213 }
214
215 return bytesWritten;
216 }
217
218
219 /**
220 * encode the input data producing a UUEncoded output stream.
221 *
222 * @param data The array of byte data.
223 * @param off The starting offset within the data.
224 * @param length Length of the data to encode.
225 *
226 * @return the number of bytes produced.
227 */
228 public int encode(PushbackInputStream in, StringBuffer out, String specials, int limit) throws IOException {
229 int count = 0;
230
231 while (count < limit) {
232 int ch = in.read();
233
234 if (ch == -1) {
235 return count;
236 }
237 // make sure this is just a single byte value.
238 ch = ch &0xFF;
239
240 // spaces require special handling. If the next character is a line terminator, then
241 // the space needs to be encoded.
242 if (ch == ' ') {
243 // blanks get translated into underscores, because the encoded tokens can't have embedded blanks.
244 out.append('_');
245 count++;
246 }
247 // non-ascii chars and the designated specials all get encoded.
248 else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) {
249 // we need at least 3 characters to write this out, so we need to
250 // forget we saw this one and try in the next segment.
251 if (count + 3 > limit) {
252 in.unread(ch);
253 return count;
254 }
255 out.append('=');
256 out.append((char)encodingTable[ch >> 4]);
257 out.append((char)encodingTable[ch & 0x0F]);
258 count += 3;
259 }
260 else {
261 // good character, just use unchanged.
262 out.append((char)ch);
263 count++;
264 }
265 }
266 return count;
267 }
268
269
270 /**
271 * Specialized version of the decoder that handles encoding of
272 * RFC 2047 encoded word values. This has special handling for
273 * certain characters, but less special handling for blanks and
274 * linebreaks.
275 *
276 * @param ch
277 * @param specials
278 *
279 * @exception IOException
280 */
281 public void encode(int ch, String specials) throws IOException {
282 // make sure this is just a single byte value.
283 ch = ch &0xFF;
284
285 // spaces require special handling. If the next character is a line terminator, then
286 // the space needs to be encoded.
287 if (ch == ' ') {
288 // blanks get translated into underscores, because the encoded tokens can't have embedded blanks.
289 writeCharacter('_');
290 }
291 // non-ascii chars and the designated specials all get encoded.
292 else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) {
293 writeEncodedCharacter(ch);
294 }
295 else {
296 // good character, just use unchanged.
297 writeCharacter(ch);
298 }
299 }
300
301
302 /**
303 * encode the input data producing a UUEncoded output stream.
304 *
305 * @param data The array of byte data.
306 * @param off The starting offset within the data.
307 * @param length Length of the data to encode.
308 * @param out The output stream the encoded data is written to.
309 *
310 * @return the number of bytes produced.
311 */
312 public int encode(byte[] data, int off, int length, OutputStream out) throws IOException {
313 // make sure we're writing to the correct stream
314 this.out = out;
315 bytesWritten = 0;
316
317 // do the actual encoding
318 return encode(data, off, length);
319 }
320
321
322 /**
323 * decode the uuencoded byte data writing it to the given output stream
324 *
325 * @param data The array of byte data to decode.
326 * @param off Starting offset within the array.
327 * @param length The length of data to encode.
328 * @param out The output stream used to return the decoded data.
329 *
330 * @return the number of bytes produced.
331 * @exception IOException
332 */
333 public int decode(byte[] data, int off, int length, OutputStream out) throws IOException {
334 // make sure we're writing to the correct stream
335 this.out = out;
336
337 int endOffset = off + length;
338 int bytesWritten = 0;
339
340 while (off < endOffset) {
341 byte ch = data[off++];
342
343 // space characters are a pain. We need to scan ahead until we find a non-space character.
344 // if the character is a line terminator, we need to discard the blanks.
345 if (ch == ' ') {
346 int trailingSpaces = 1;
347 // scan forward, counting the characters.
348 while (off < endOffset && data[off] == ' ') {
349 // step forward and count this.
350 off++;
351 trailingSpaces++;
352 }
353 // is this a lineend at the current location?
354 if (off >= endOffset || data[off] == '\r' || data[off] == '\n') {
355 // go to the next one
356 continue;
357 }
358 else {
359 // make sure we account for the spaces in the output count.
360 bytesWritten += trailingSpaces;
361 // write out the blank characters we counted and continue with the non-blank.
362 while (trailingSpaces-- > 0) {
363 out.write(' ');
364 }
365 }
366 }
367 else if (ch == '=') {
368 // we found an encoded character. Reduce the 3 char sequence to one.
369 // but first, make sure we have two characters to work with.
370 if (off + 1 >= endOffset) {
371 throw new IOException("Invalid quoted printable encoding");
372 }
373 // convert the two bytes back from hex.
374 byte b1 = data[off++];
375 byte b2 = data[off++];
376
377 // we've found an encoded carriage return. The next char needs to be a newline
378 if (b1 == '\r') {
379 if (b2 != '\n') {
380 throw new IOException("Invalid quoted printable encoding");
381 }
382 // this was a soft linebreak inserted by the encoding. We just toss this away
383 // on decode.
384 }
385 else {
386 // this is a hex pair we need to convert back to a single byte.
387 b1 = decodingTable[b1];
388 b2 = decodingTable[b2];
389 out.write((b1 << 4) | b2);
390 // 3 bytes in, one byte out
391 bytesWritten++;
392 }
393 }
394 else {
395 // simple character, just write it out.
396 out.write(ch);
397 bytesWritten++;
398 }
399 }
400
401 return bytesWritten;
402 }
403
404 /**
405 * Decode a byte array of data.
406 *
407 * @param data The data array.
408 * @param out The output stream target for the decoded data.
409 *
410 * @return The number of bytes written to the stream.
411 * @exception IOException
412 */
413 public int decodeWord(byte[] data, OutputStream out) throws IOException {
414 return decodeWord(data, 0, data.length, out);
415 }
416
417
418 /**
419 * decode the uuencoded byte data writing it to the given output stream
420 *
421 * @param data The array of byte data to decode.
422 * @param off Starting offset within the array.
423 * @param length The length of data to encode.
424 * @param out The output stream used to return the decoded data.
425 *
426 * @return the number of bytes produced.
427 * @exception IOException
428 */
429 public int decodeWord(byte[] data, int off, int length, OutputStream out) throws IOException {
430 // make sure we're writing to the correct stream
431 this.out = out;
432
433 int endOffset = off + length;
434 int bytesWritten = 0;
435
436 while (off < endOffset) {
437 byte ch = data[off++];
438
439 // space characters were translated to '_' on encode, so we need to translate them back.
440 if (ch == '_') {
441 out.write(' ');
442 }
443 else if (ch == '=') {
444 // we found an encoded character. Reduce the 3 char sequence to one.
445 // but first, make sure we have two characters to work with.
446 if (off + 1 >= endOffset) {
447 throw new IOException("Invalid quoted printable encoding");
448 }
449 // convert the two bytes back from hex.
450 byte b1 = data[off++];
451 byte b2 = data[off++];
452
453 // we've found an encoded carriage return. The next char needs to be a newline
454 if (b1 == '\r') {
455 if (b2 != '\n') {
456 throw new IOException("Invalid quoted printable encoding");
457 }
458 // this was a soft linebreak inserted by the encoding. We just toss this away
459 // on decode.
460 }
461 else {
462 // this is a hex pair we need to convert back to a single byte.
463 byte c1 = decodingTable[b1];
464 byte c2 = decodingTable[b2];
465 out.write((c1 << 4) | c2);
466 // 3 bytes in, one byte out
467 bytesWritten++;
468 }
469 }
470 else {
471 // simple character, just write it out.
472 out.write(ch);
473 bytesWritten++;
474 }
475 }
476
477 return bytesWritten;
478 }
479
480
481 /**
482 * decode the UUEncoded String data writing it to the given output stream.
483 *
484 * @param data The String data to decode.
485 * @param out The output stream to write the decoded data to.
486 *
487 * @return the number of bytes produced.
488 * @exception IOException
489 */
490 public int decode(String data, OutputStream out) throws IOException {
491 try {
492 // just get the byte data and decode.
493 byte[] bytes = data.getBytes("US-ASCII");
494 return decode(bytes, 0, bytes.length, out);
495 } catch (UnsupportedEncodingException e) {
496 throw new IOException("Invalid UUEncoding");
497 }
498 }
499
500 private void checkLineLength(int required) throws IOException {
501 // if we're at our line length limit, write out a soft line break and reset.
502 if ((lineCount + required) >= lineLength ) {
503 out.write('=');
504 out.write('\r');
505 out.write('\n');
506 bytesWritten += 3;
507 lineCount = 0;
508 }
509 }
510
511
512 public void writeEncodedCharacter(int ch) throws IOException {
513 // we need 3 characters for an encoded value
514 checkLineLength(3);
515 out.write('=');
516 out.write(encodingTable[ch >> 4]);
517 out.write(encodingTable[ch & 0x0F]);
518 lineCount += 3;
519 bytesWritten += 3;
520 }
521
522
523 public void writeCharacter(int ch) throws IOException {
524 // we need 3 characters for an encoded value
525 checkLineLength(1);
526 out.write(ch);
527 lineCount++;
528 bytesWritten++;
529 }
530
531
532 public void writeEOL() throws IOException {
533 out.write('\r');
534 out.write('\n');
535 lineCount = 0;
536 bytesWritten += 3;
537 }
538
539
540 public int decode(InputStream in) throws IOException {
541
542 // we potentially need to scan over spans of whitespace characters to determine if they're real
543 // we just return blanks until the count goes to zero.
544 if (deferredWhitespace > 0) {
545 deferredWhitespace--;
546 return ' ';
547 }
548
549 // we may have needed to scan ahead to find the first non-blank character, which we would store here.
550 // hand that back once we're done with the blanks.
551 if (cachedCharacter != -1) {
552 int result = cachedCharacter;
553 cachedCharacter = -1;
554 return result;
555 }
556
557 int ch = in.read();
558
559 // reflect back an EOF condition.
560 if (ch == -1) {
561 return -1;
562 }
563
564 // space characters are a pain. We need to scan ahead until we find a non-space character.
565 // if the character is a line terminator, we need to discard the blanks.
566 if (ch == ' ') {
567 // scan forward, counting the characters.
568 while ((ch = in.read()) == ' ') {
569 deferredWhitespace++;
570 }
571
572 // is this a lineend at the current location?
573 if (ch == -1 || ch == '\r' || ch == '\n') {
574 // those blanks we so zealously counted up don't really exist. Clear out the counter.
575 deferredWhitespace = 0;
576 // return the real significant character now.
577 return ch;
578 }
579 // remember this character for later, after we've used up the deferred blanks.
580 cachedCharacter = decodeNonspaceChar(in, ch);
581 // return this space. We did not include this one in the deferred count, so we're right in sync.
582 return ' ';
583 }
584 return decodeNonspaceChar(in, ch);
585 }
586
587 private int decodeNonspaceChar(InputStream in, int ch) throws IOException {
588 if (ch == '=') {
589 int b1 = in.read();
590 // we need to get two characters after the quotation marker
591 if (b1 == -1) {
592 throw new IOException("Truncated quoted printable data");
593 }
594 int b2 = in.read();
595 // we need to get two characters after the quotation marker
596 if (b2 == -1) {
597 throw new IOException("Truncated quoted printable data");
598 }
599
600 // we've found an encoded carriage return. The next char needs to be a newline
601 if (b1 == '\r') {
602 if (b2 != '\n') {
603 throw new IOException("Invalid quoted printable encoding");
604 }
605 // this was a soft linebreak inserted by the encoding. We just toss this away
606 // on decode. We need to return something, so recurse and decode the next.
607 return decode(in);
608 }
609 else {
610 // this is a hex pair we need to convert back to a single byte.
611 b1 = decodingTable[b1];
612 b2 = decodingTable[b2];
613 return (b1 << 4) | b2;
614 }
615 }
616 else {
617 return ch;
618 }
619 }
620
621
622 /**
623 * Perform RFC-2047 word encoding using Q-P data encoding.
624 *
625 * @param in The source for the encoded data.
626 * @param charset The charset tag to be added to each encoded data section.
627 * @param specials The set of special characters that we require to encoded.
628 * @param out The output stream where the encoded data is to be written.
629 * @param fold Controls whether separate sections of encoded data are separated by
630 * linebreaks or whitespace.
631 *
632 * @exception IOException
633 */
634 public void encodeWord(InputStream in, String charset, String specials, OutputStream out, boolean fold) throws IOException
635 {
636 // we need to scan ahead in a few places, which may require pushing characters back on to the stream.
637 // make sure we have a stream where this is possible.
638 PushbackInputStream inStream = new PushbackInputStream(in);
639 PrintStream writer = new PrintStream(out);
640
641 // segments of encoded data are limited to 75 byes, including the control sections.
642 int limit = 75 - 7 - charset.length();
643 boolean firstLine = true;
644 StringBuffer encodedString = new StringBuffer(76);
645
646 while (true) {
647
648 // encode another segment of data.
649 encode(inStream, encodedString, specials, limit);
650 // nothing encoded means we've hit the end of the data.
651 if (encodedString.length() == 0) {
652 break;
653 }
654 // if we have more than one segment, we need to insert separators. Depending on whether folding
655 // was requested, this is either a blank or a linebreak.
656 if (!firstLine) {
657 if (fold) {
658 writer.print("\r\n");
659 }
660 else {
661 writer.print(" ");
662 }
663 }
664
665 // add the encoded word header
666 writer.print("=?");
667 writer.print(charset);
668 writer.print("?Q?");
669 // the data
670 writer.print(encodedString.toString());
671 // and the terminator mark
672 writer.print("?=");
673 writer.flush();
674
675 // we reset the string buffer and reuse it.
676 encodedString.setLength(0);
677 // we need a delimiter between sections from this point on.
678 firstLine = false;
679 }
680 }
681
682
683 /**
684 * Perform RFC-2047 word encoding using Base64 data encoding.
685 *
686 * @param in The source for the encoded data.
687 * @param charset The charset tag to be added to each encoded data section.
688 * @param out The output stream where the encoded data is to be written.
689 * @param fold Controls whether separate sections of encoded data are separated by
690 * linebreaks or whitespace.
691 *
692 * @exception IOException
693 */
694 public void encodeWord(byte[] data, StringBuffer out, String charset, String specials) throws IOException
695 {
696 // append the word header
697 out.append("=?");
698 out.append(charset);
699 out.append("?Q?");
700 // add on the encodeded data
701 encodeWordData(data, out, specials);
702 // the end of the encoding marker
703 out.append("?=");
704 }
705
706
707 /**
708 * Perform RFC-2047 word encoding using Q-P data encoding.
709 *
710 * @param in The source for the encoded data.
711 * @param charset The charset tag to be added to each encoded data section.
712 * @param specials The set of special characters that we require to encoded.
713 * @param out The output stream where the encoded data is to be written.
714 * @param fold Controls whether separate sections of encoded data are separated by
715 * linebreaks or whitespace.
716 *
717 * @exception IOException
718 */
719 public void encodeWordData(byte[] data, StringBuffer out, String specials) throws IOException {
720 for (int i = 0; i < data.length; i++) {
721 int ch = data[i] & 0xff; ;
722
723 // spaces require special handling. If the next character is a line terminator, then
724 // the space needs to be encoded.
725 if (ch == ' ') {
726 // blanks get translated into underscores, because the encoded tokens can't have embedded blanks.
727 out.append('_');
728 }
729 // non-ascii chars and the designated specials all get encoded.
730 else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) {
731 out.append('=');
732 out.append((char)encodingTable[ch >> 4]);
733 out.append((char)encodingTable[ch & 0x0F]);
734 }
735 else {
736 // good character, just use unchanged.
737 out.append((char)ch);
738 }
739 }
740 }
741
742
743 /**
744 * Estimate the final encoded size of a segment of data.
745 * This is used to ensure that the encoded blocks do
746 * not get split across a unicode character boundary and
747 * that the encoding will fit within the bounds of
748 * a mail header line.
749 *
750 * @param data The data we're anticipating encoding.
751 *
752 * @return The size of the byte data in encoded form.
753 */
754 public int estimateEncodedLength(byte[] data, String specials)
755 {
756 int count = 0;
757
758 for (int i = 0; i < data.length; i++) {
759 // make sure this is just a single byte value.
760 int ch = data[i] & 0xff;
761
762 // non-ascii chars and the designated specials all get encoded.
763 if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) {
764 // Q encoding translates a single char into 3 characters
765 count += 3;
766 }
767 else {
768 // non-encoded character
769 count++;
770 }
771 }
772 return count;
773 }
774 }
775
776
777