001 /**
002 *
003 * Copyright 2003-2004 The Apache Software Foundation
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.geronimo.mail.util;
019
020 import java.io.EOFException;
021 import java.io.IOException;
022 import java.io.InputStream;
023 import java.io.OutputStream;
024 import java.io.PrintStream;
025 import java.io.PushbackInputStream;
026 import java.io.UnsupportedEncodingException;
027
028 public class QuotedPrintableEncoder implements Encoder {
029
030 static protected final byte[] encodingTable =
031 {
032 (byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5', (byte)'6', (byte)'7',
033 (byte)'8', (byte)'9', (byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F'
034 };
035
036 /*
037 * set up the decoding table.
038 */
039 static protected final byte[] decodingTable = new byte[128];
040
041 static {
042 // initialize the decoding table
043 for (int i = 0; i < encodingTable.length; i++)
044 {
045 decodingTable[encodingTable[i]] = (byte)i;
046 }
047 }
048
049
050 // default number of characters we will write per line.
051 static private final int DEFAULT_CHARS_PER_LINE = 76;
052
053 // the output stream we're wrapped around
054 protected OutputStream out;
055 // the number of bytes written;
056 protected int bytesWritten = 0;
057 // number of bytes written on the current line
058 protected int lineCount = 0;
059 // line length we're dealing with
060 protected int lineLength;
061 // number of deferred whitespace characters in decode mode.
062 protected int deferredWhitespace = 0;
063
064 protected int cachedCharacter = -1;
065
066 // indicates whether the last character was a '\r', potentially part of a CRLF sequence.
067 protected boolean lastCR = false;
068 // remember whether last character was a white space.
069 protected boolean lastWhitespace = false;
070
071 public QuotedPrintableEncoder() {
072 this(null, DEFAULT_CHARS_PER_LINE);
073 }
074
075 public QuotedPrintableEncoder(OutputStream out) {
076 this(out, DEFAULT_CHARS_PER_LINE);
077 }
078
079 public QuotedPrintableEncoder(OutputStream out, int lineLength) {
080 this.out = out;
081 this.lineLength = lineLength;
082 }
083
084 private void checkDeferred(int ch) throws IOException {
085 // was the last character we looked at a whitespace? Try to decide what to do with it now.
086 if (lastWhitespace) {
087 // if this whitespace is at the end of the line, write it out encoded
088 if (ch == '\r' || ch == '\n') {
089 writeEncodedCharacter(' ');
090 }
091 else {
092 // we can write this out without encoding.
093 writeCharacter(' ');
094 }
095 // we always turn this off.
096 lastWhitespace = false;
097 }
098 // deferred carriage return?
099 else if (lastCR) {
100 // if the char following the CR was not a new line, write an EOL now.
101 if (ch != '\n') {
102 writeEOL();
103 }
104 // we always turn this off too
105 lastCR = false;
106 }
107 }
108
109
110 /**
111 * encode the input data producing a UUEncoded output stream.
112 *
113 * @param data The array of byte data.
114 * @param off The starting offset within the data.
115 * @param length Length of the data to encode.
116 *
117 * @return the number of bytes produced.
118 */
119 public int encode(byte[] data, int off, int length) throws IOException {
120 int endOffset = off + length;
121
122 while (off < endOffset) {
123 // get the character
124 byte ch = data[off++];
125
126 // handle the encoding of this character.
127 encode(ch);
128 }
129
130 return bytesWritten;
131 }
132
133
134 public void encode(int ch) throws IOException {
135 // make sure this is just a single byte value.
136 ch = ch &0xFF;
137
138 // see if we had to defer handling of a whitespace or '\r' character, and handle it if necessary.
139 checkDeferred(ch);
140 // different characters require special handling.
141 switch (ch) {
142 // spaces require special handling. If the next character is a line terminator, then
143 // the space needs to be encoded.
144 case ' ':
145 {
146 // at this point, we don't know whether this needs encoding or not. If the next
147 // character is a linend, it gets encoded. If anything else, we just write it as is.
148 lastWhitespace = true;
149 // turn off any CR flags.
150 lastCR = false;
151 break;
152 }
153
154 // carriage return, which may be part of a CRLF sequence.
155 case '\r':
156 {
157 // just flag this until we see the next character.
158 lastCR = true;
159 break;
160 }
161
162 // a new line character...we need to check to see if it was paired up with a '\r' char.
163 case '\n':
164 {
165 // we always write this out for a newline. We defer CRs until we see if the LF follows.
166 writeEOL();
167 break;
168 }
169
170 // an '=' is the escape character for an encoded character, so it must also
171 // be written encoded.
172 case '=':
173 {
174 writeEncodedCharacter(ch);
175 break;
176 }
177
178 // all other characters. If outside the printable character range, write it encoded.
179 default:
180 {
181 if (ch < 32 || ch >= 127) {
182 writeEncodedCharacter(ch);
183 }
184 else {
185 writeCharacter(ch);
186 }
187 break;
188 }
189 }
190 }
191
192
193 /**
194 * encode the input data producing a UUEncoded output stream.
195 *
196 * @param data The array of byte data.
197 * @param off The starting offset within the data.
198 * @param length Length of the data to encode.
199 *
200 * @return the number of bytes produced.
201 */
202 public int encode(byte[] data, int off, int length, String specials) throws IOException {
203 int endOffset = off + length;
204
205 while (off < endOffset) {
206 // get the character
207 byte ch = data[off++];
208
209 // handle the encoding of this character.
210 encode(ch, specials);
211 }
212
213 return bytesWritten;
214 }
215
216
217 /**
218 * encode the input data producing a UUEncoded output stream.
219 *
220 * @param data The array of byte data.
221 * @param off The starting offset within the data.
222 * @param length Length of the data to encode.
223 *
224 * @return the number of bytes produced.
225 */
226 public int encode(PushbackInputStream in, StringBuffer out, String specials, int limit) throws IOException {
227 int count = 0;
228
229 while (count < limit) {
230 int ch = in.read();
231
232 if (ch == -1) {
233 return count;
234 }
235 // make sure this is just a single byte value.
236 ch = ch &0xFF;
237
238 // spaces require special handling. If the next character is a line terminator, then
239 // the space needs to be encoded.
240 if (ch == ' ') {
241 // blanks get translated into underscores, because the encoded tokens can't have embedded blanks.
242 out.append('_');
243 count++;
244 }
245 // non-ascii chars and the designated specials all get encoded.
246 else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) {
247 // we need at least 3 characters to write this out, so we need to
248 // forget we saw this one and try in the next segment.
249 if (count + 3 > limit) {
250 in.unread(ch);
251 return count;
252 }
253 out.append('=');
254 out.append((char)encodingTable[ch >> 4]);
255 out.append((char)encodingTable[ch & 0x0F]);
256 count += 3;
257 }
258 else {
259 // good character, just use unchanged.
260 out.append((char)ch);
261 count++;
262 }
263 }
264 return count;
265 }
266
267
268 /**
269 * Specialized version of the decoder that handles encoding of
270 * RFC 2047 encoded word values. This has special handling for
271 * certain characters, but less special handling for blanks and
272 * linebreaks.
273 *
274 * @param ch
275 * @param specials
276 *
277 * @exception IOException
278 */
279 public void encode(int ch, String specials) throws IOException {
280 // make sure this is just a single byte value.
281 ch = ch &0xFF;
282
283 // spaces require special handling. If the next character is a line terminator, then
284 // the space needs to be encoded.
285 if (ch == ' ') {
286 // blanks get translated into underscores, because the encoded tokens can't have embedded blanks.
287 writeCharacter('_');
288 }
289 // non-ascii chars and the designated specials all get encoded.
290 else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) {
291 writeEncodedCharacter(ch);
292 }
293 else {
294 // good character, just use unchanged.
295 writeCharacter(ch);
296 }
297 }
298
299
300 /**
301 * encode the input data producing a UUEncoded output stream.
302 *
303 * @param data The array of byte data.
304 * @param off The starting offset within the data.
305 * @param length Length of the data to encode.
306 * @param out The output stream the encoded data is written to.
307 *
308 * @return the number of bytes produced.
309 */
310 public int encode(byte[] data, int off, int length, OutputStream out) throws IOException {
311 // make sure we're writing to the correct stream
312 this.out = out;
313 bytesWritten = 0;
314
315 // do the actual encoding
316 return encode(data, off, length);
317 }
318
319
320 /**
321 * decode the uuencoded byte data writing it to the given output stream
322 *
323 * @param data The array of byte data to decode.
324 * @param off Starting offset within the array.
325 * @param length The length of data to encode.
326 * @param out The output stream used to return the decoded data.
327 *
328 * @return the number of bytes produced.
329 * @exception IOException
330 */
331 public int decode(byte[] data, int off, int length, OutputStream out) throws IOException {
332 // make sure we're writing to the correct stream
333 this.out = out;
334
335 int endOffset = off + length;
336 int bytesWritten = 0;
337
338 while (off < endOffset) {
339 byte ch = data[off++];
340
341 // space characters are a pain. We need to scan ahead until we find a non-space character.
342 // if the character is a line terminator, we need to discard the blanks.
343 if (ch == ' ') {
344 int trailingSpaces = 1;
345 // scan forward, counting the characters.
346 while (off < endOffset && data[off] == ' ') {
347 // step forward and count this.
348 off++;
349 trailingSpaces++;
350 }
351 // is this a lineend at the current location?
352 if (off >= endOffset || data[off] == '\r' || data[off] == '\n') {
353 // go to the next one
354 continue;
355 }
356 else {
357 // make sure we account for the spaces in the output count.
358 bytesWritten += trailingSpaces;
359 // write out the blank characters we counted and continue with the non-blank.
360 while (trailingSpaces-- > 0) {
361 out.write(' ');
362 }
363 }
364 }
365 else if (ch == '=') {
366 // we found an encoded character. Reduce the 3 char sequence to one.
367 // but first, make sure we have two characters to work with.
368 if (off + 1 >= endOffset) {
369 throw new IOException("Invalid quoted printable encoding");
370 }
371 // convert the two bytes back from hex.
372 byte b1 = data[off++];
373 byte b2 = data[off++];
374
375 // we've found an encoded carriage return. The next char needs to be a newline
376 if (b1 == '\r') {
377 if (b2 != '\n') {
378 throw new IOException("Invalid quoted printable encoding");
379 }
380 // this was a soft linebreak inserted by the encoding. We just toss this away
381 // on decode.
382 }
383 else {
384 // this is a hex pair we need to convert back to a single byte.
385 b1 = decodingTable[b1];
386 b2 = decodingTable[b2];
387 out.write((b1 << 4) | b2);
388 // 3 bytes in, one byte out
389 bytesWritten++;
390 }
391 }
392 else {
393 // simple character, just write it out.
394 out.write(ch);
395 bytesWritten++;
396 }
397 }
398
399 return bytesWritten;
400 }
401
402 /**
403 * Decode a byte array of data.
404 *
405 * @param data The data array.
406 * @param out The output stream target for the decoded data.
407 *
408 * @return The number of bytes written to the stream.
409 * @exception IOException
410 */
411 public int decodeWord(byte[] data, OutputStream out) throws IOException {
412 return decodeWord(data, 0, data.length, out);
413 }
414
415
416 /**
417 * decode the uuencoded byte data writing it to the given output stream
418 *
419 * @param data The array of byte data to decode.
420 * @param off Starting offset within the array.
421 * @param length The length of data to encode.
422 * @param out The output stream used to return the decoded data.
423 *
424 * @return the number of bytes produced.
425 * @exception IOException
426 */
427 public int decodeWord(byte[] data, int off, int length, OutputStream out) throws IOException {
428 // make sure we're writing to the correct stream
429 this.out = out;
430
431 int endOffset = off + length;
432 int bytesWritten = 0;
433
434 while (off < endOffset) {
435 byte ch = data[off++];
436
437 // space characters were translated to '_' on encode, so we need to translate them back.
438 if (ch == '_') {
439 out.write(' ');
440 }
441 else if (ch == '=') {
442 // we found an encoded character. Reduce the 3 char sequence to one.
443 // but first, make sure we have two characters to work with.
444 if (off + 1 >= endOffset) {
445 throw new IOException("Invalid quoted printable encoding");
446 }
447 // convert the two bytes back from hex.
448 byte b1 = data[off++];
449 byte b2 = data[off++];
450
451 // we've found an encoded carriage return. The next char needs to be a newline
452 if (b1 == '\r') {
453 if (b2 != '\n') {
454 throw new IOException("Invalid quoted printable encoding");
455 }
456 // this was a soft linebreak inserted by the encoding. We just toss this away
457 // on decode.
458 }
459 else {
460 // this is a hex pair we need to convert back to a single byte.
461 byte c1 = decodingTable[b1];
462 byte c2 = decodingTable[b2];
463 out.write((c1 << 4) | c2);
464 // 3 bytes in, one byte out
465 bytesWritten++;
466 }
467 }
468 else {
469 // simple character, just write it out.
470 out.write(ch);
471 bytesWritten++;
472 }
473 }
474
475 return bytesWritten;
476 }
477
478
479 /**
480 * decode the UUEncoded String data writing it to the given output stream.
481 *
482 * @param data The String data to decode.
483 * @param out The output stream to write the decoded data to.
484 *
485 * @return the number of bytes produced.
486 * @exception IOException
487 */
488 public int decode(String data, OutputStream out) throws IOException {
489 try {
490 // just get the byte data and decode.
491 byte[] bytes = data.getBytes("US-ASCII");
492 return decode(bytes, 0, bytes.length, out);
493 } catch (UnsupportedEncodingException e) {
494 throw new IOException("Invalid UUEncoding");
495 }
496 }
497
498 private void checkLineLength(int required) throws IOException {
499 // if we're at our line length limit, write out a soft line break and reset.
500 if ((lineCount + required) > lineLength ) {
501 out.write('=');
502 out.write('\r');
503 out.write('\n');
504 bytesWritten += 3;
505 lineCount = 0;
506 }
507 }
508
509
510 public void writeEncodedCharacter(int ch) throws IOException {
511 // we need 3 characters for an encoded value
512 checkLineLength(3);
513 out.write('=');
514 out.write(encodingTable[ch >> 4]);
515 out.write(encodingTable[ch & 0x0F]);
516 lineCount += 3;
517 bytesWritten += 3;
518 }
519
520
521 public void writeCharacter(int ch) throws IOException {
522 // we need 3 characters for an encoded value
523 checkLineLength(1);
524 out.write(ch);
525 lineCount++;
526 bytesWritten++;
527 }
528
529
530 public void writeEOL() throws IOException {
531 out.write('\r');
532 out.write('\n');
533 lineCount = 0;
534 bytesWritten += 3;
535 }
536
537
538 public int decode(InputStream in) throws IOException {
539
540 // we potentially need to scan over spans of whitespace characters to determine if they're real
541 // we just return blanks until the count goes to zero.
542 if (deferredWhitespace > 0) {
543 deferredWhitespace--;
544 return ' ';
545 }
546
547 // we may have needed to scan ahead to find the first non-blank character, which we would store here.
548 // hand that back once we're done with the blanks.
549 if (cachedCharacter != -1) {
550 int result = cachedCharacter;
551 cachedCharacter = -1;
552 return result;
553 }
554
555 int ch = in.read();
556
557 // reflect back an EOF condition.
558 if (ch == -1) {
559 return -1;
560 }
561
562 // space characters are a pain. We need to scan ahead until we find a non-space character.
563 // if the character is a line terminator, we need to discard the blanks.
564 if (ch == ' ') {
565 // scan forward, counting the characters.
566 while ((ch = in.read()) == ' ') {
567 deferredWhitespace++;
568 }
569
570 // is this a lineend at the current location?
571 if (ch == -1 || ch == '\r' || ch == '\n') {
572 // those blanks we so zealously counted up don't really exist. Clear out the counter.
573 deferredWhitespace = 0;
574 // return the real significant character now.
575 return ch;
576 }
577 else {
578 // remember this character for later, after we've used up the deferred blanks.
579 cachedCharacter = ch;
580 // return this space. We did not include this one in the deferred count, so we're right in sync.
581 return ' ';
582 }
583 }
584 else if (ch == '=') {
585 int b1 = in.read();
586 // we need to get two characters after the quotation marker
587 if (b1 == -1) {
588 throw new IOException("Truncated quoted printable data");
589 }
590 int b2 = in.read();
591 // we need to get two characters after the quotation marker
592 if (b2 == -1) {
593 throw new IOException("Truncated quoted printable data");
594 }
595
596 // we've found an encoded carriage return. The next char needs to be a newline
597 if (b1 == '\r') {
598 if (b2 != '\n') {
599 throw new IOException("Invalid quoted printable encoding");
600 }
601 // this was a soft linebreak inserted by the encoding. We just toss this away
602 // on decode. We need to return something, so recurse and decode the next.
603 return decode(in);
604 }
605 else {
606 // this is a hex pair we need to convert back to a single byte.
607 b1 = decodingTable[b1];
608 b2 = decodingTable[b2];
609 return (b1 << 4) | b2;
610 }
611 }
612 else {
613 return ch;
614 }
615 }
616
617
618 /**
619 * Perform RFC-2047 word encoding using Q-P data encoding.
620 *
621 * @param in The source for the encoded data.
622 * @param charset The charset tag to be added to each encoded data section.
623 * @param specials The set of special characters that we require to encoded.
624 * @param out The output stream where the encoded data is to be written.
625 * @param fold Controls whether separate sections of encoded data are separated by
626 * linebreaks or whitespace.
627 *
628 * @exception IOException
629 */
630 public void encodeWord(InputStream in, String charset, String specials, OutputStream out, boolean fold) throws IOException
631 {
632 // we need to scan ahead in a few places, which may require pushing characters back on to the stream.
633 // make sure we have a stream where this is possible.
634 PushbackInputStream inStream = new PushbackInputStream(in);
635 PrintStream writer = new PrintStream(out);
636
637 // segments of encoded data are limited to 76 byes, including the control sections.
638 int limit = 76 - 7 - charset.length();
639 boolean firstLine = true;
640 StringBuffer encodedString = new StringBuffer(76);
641
642 while (true) {
643
644 // encode another segment of data.
645 encode(inStream, encodedString, specials, limit);
646 // nothing encoded means we've hit the end of the data.
647 if (encodedString.length() == 0) {
648 break;
649 }
650 // if we have more than one segment, we need to insert separators. Depending on whether folding
651 // was requested, this is either a blank or a linebreak.
652 if (!firstLine) {
653 if (fold) {
654 writer.print("\r\n");
655 }
656 else {
657 writer.print(" ");
658 }
659 }
660
661 // add the encoded word header
662 writer.print("=?");
663 writer.print(charset);
664 writer.print("?Q?");
665 // the data
666 writer.print(encodedString.toString());
667 // and the terminator mark
668 writer.print("?=");
669 writer.flush();
670
671 // we reset the string buffer and reuse it.
672 encodedString.setLength(0);
673 }
674 }
675 }
676
677
678