View Javadoc

1   /**
2    *
3    * Copyright 2003-2006 The Apache Software Foundation
4    *
5    *  Licensed under the Apache License, Version 2.0 (the "License");
6    *  you may not use this file except in compliance with the License.
7    *  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   */
17  
18  package org.apache.geronimo.mail.util;
19  
20  import java.io.EOFException;
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.io.OutputStream;
24  import java.io.PrintStream;
25  import java.io.PushbackInputStream;
26  import java.io.UnsupportedEncodingException;
27  
28  public class QuotedPrintableEncoder implements Encoder {
29  
30      static protected final byte[] encodingTable =
31      {
32          (byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5', (byte)'6', (byte)'7',
33          (byte)'8', (byte)'9', (byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F'
34      };
35  
36      /*
37       * set up the decoding table.
38       */
39      static protected final byte[] decodingTable = new byte[128];
40  
41      static {
42          // initialize the decoding table
43          for (int i = 0; i < encodingTable.length; i++)
44          {
45              decodingTable[encodingTable[i]] = (byte)i;
46          }
47      }
48  
49  
50      // default number of characters we will write per line.
51      static private final int DEFAULT_CHARS_PER_LINE = 76;
52  
53      // the output stream we're wrapped around
54      protected OutputStream out;
55      // the number of bytes written;
56      protected int bytesWritten = 0;
57      // number of bytes written on the current line
58      protected int lineCount = 0;
59      // line length we're dealing with
60      protected int lineLength;
61      // number of deferred whitespace characters in decode mode.
62      protected int deferredWhitespace = 0;
63  
64      protected int cachedCharacter = -1;
65  
66      // indicates whether the last character was a '\r', potentially part of a CRLF sequence.
67      protected boolean lastCR = false;
68      // remember whether last character was a white space.
69      protected boolean lastWhitespace = false;
70  
71      public QuotedPrintableEncoder() {
72          this(null, DEFAULT_CHARS_PER_LINE);
73      }
74  
75      public QuotedPrintableEncoder(OutputStream out) {
76          this(out, DEFAULT_CHARS_PER_LINE);
77      }
78  
79      public QuotedPrintableEncoder(OutputStream out, int lineLength) {
80          this.out = out;
81          this.lineLength = lineLength;
82      }
83  
84      private void checkDeferred(int ch) throws IOException {
85          // was the last character we looked at a whitespace?  Try to decide what to do with it now.
86          if (lastWhitespace) {
87              // if this whitespace is at the end of the line, write it out encoded
88              if (ch == '\r' || ch == '\n') {
89                  writeEncodedCharacter(' ');
90              }
91              else {
92                  // we can write this out without encoding.
93                  writeCharacter(' ');
94              }
95              // we always turn this off.
96              lastWhitespace = false;
97          }
98          // deferred carriage return?
99          else if (lastCR) {
100             // if the char following the CR was not a new line, write an EOL now.
101             if (ch != '\n') {
102                 writeEOL();
103             }
104             // we always turn this off too
105             lastCR = false;
106         }
107     }
108 
109 
110     /**
111      * encode the input data producing a UUEncoded output stream.
112      *
113      * @param data   The array of byte data.
114      * @param off    The starting offset within the data.
115      * @param length Length of the data to encode.
116      *
117      * @return the number of bytes produced.
118      */
119     public int encode(byte[] data, int off, int length) throws IOException {
120         int endOffset = off + length;
121 
122         while (off < endOffset) {
123             // get the character
124             byte ch = data[off++];
125 
126             // handle the encoding of this character.
127             encode(ch);
128         }
129 
130         return bytesWritten;
131     }
132 
133 
134     public void encode(int ch) throws IOException {
135         // make sure this is just a single byte value.
136         ch = ch &0xFF;
137 
138         // see if we had to defer handling of a whitespace or '\r' character, and handle it if necessary.
139         checkDeferred(ch);
140         // different characters require special handling.
141         switch (ch) {
142             // spaces require special handling.  If the next character is a line terminator, then
143             // the space needs to be encoded.
144             case ' ':
145             {
146                 // at this point, we don't know whether this needs encoding or not.  If the next
147                 // character is a linend, it gets encoded.  If anything else, we just write it as is.
148                 lastWhitespace = true;
149                 // turn off any CR flags.
150                 lastCR = false;
151                 break;
152             }
153 
154             // carriage return, which may be part of a CRLF sequence.
155             case '\r':
156             {
157                 // just flag this until we see the next character.
158                 lastCR = true;
159                 break;
160             }
161 
162             // a new line character...we need to check to see if it was paired up with a '\r' char.
163             case '\n':
164             {
165                 // we always write this out for a newline.  We defer CRs until we see if the LF follows.
166                 writeEOL();
167                 break;
168             }
169 
170             // an '=' is the escape character for an encoded character, so it must also
171             // be written encoded.
172             case '=':
173             {
174                 writeEncodedCharacter(ch);
175                 break;
176             }
177 
178             // all other characters.  If outside the printable character range, write it encoded.
179             default:
180             {
181                 if (ch < 32 || ch >= 127) {
182                     writeEncodedCharacter(ch);
183                 }
184                 else {
185                     writeCharacter(ch);
186                 }
187                 break;
188             }
189         }
190     }
191 
192 
193     /**
194      * encode the input data producing a UUEncoded output stream.
195      *
196      * @param data   The array of byte data.
197      * @param off    The starting offset within the data.
198      * @param length Length of the data to encode.
199      *
200      * @return the number of bytes produced.
201      */
202     public int encode(byte[] data, int off, int length, String specials) throws IOException {
203         int endOffset = off + length;
204 
205         while (off < endOffset) {
206             // get the character
207             byte ch = data[off++];
208 
209             // handle the encoding of this character.
210             encode(ch, specials);
211         }
212 
213         return bytesWritten;
214     }
215 
216 
217     /**
218      * encode the input data producing a UUEncoded output stream.
219      *
220      * @param data   The array of byte data.
221      * @param off    The starting offset within the data.
222      * @param length Length of the data to encode.
223      *
224      * @return the number of bytes produced.
225      */
226     public int encode(PushbackInputStream in, StringBuffer out, String specials, int limit) throws IOException {
227         int count = 0;
228 
229         while (count < limit) {
230             int ch = in.read();
231 
232             if (ch == -1) {
233                 return count;
234             }
235             // make sure this is just a single byte value.
236             ch = ch &0xFF;
237 
238             // spaces require special handling.  If the next character is a line terminator, then
239             // the space needs to be encoded.
240             if (ch == ' ') {
241                 // blanks get translated into underscores, because the encoded tokens can't have embedded blanks.
242                 out.append('_');
243                 count++;
244             }
245             // non-ascii chars and the designated specials all get encoded.
246             else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) {
247                 // we need at least 3 characters to write this out, so we need to
248                 // forget we saw this one and try in the next segment.
249                 if (count + 3 > limit) {
250                     in.unread(ch);
251                     return count;
252                 }
253                 out.append('=');
254                 out.append((char)encodingTable[ch >> 4]);
255                 out.append((char)encodingTable[ch & 0x0F]);
256                 count += 3;
257             }
258             else {
259                 // good character, just use unchanged.
260                 out.append((char)ch);
261                 count++;
262             }
263         }
264         return count;
265     }
266 
267 
268     /**
269      * Specialized version of the decoder that handles encoding of
270      * RFC 2047 encoded word values.  This has special handling for
271      * certain characters, but less special handling for blanks and
272      * linebreaks.
273      *
274      * @param ch
275      * @param specials
276      *
277      * @exception IOException
278      */
279     public void encode(int ch, String specials) throws IOException {
280         // make sure this is just a single byte value.
281         ch = ch &0xFF;
282 
283         // spaces require special handling.  If the next character is a line terminator, then
284         // the space needs to be encoded.
285         if (ch == ' ') {
286             // blanks get translated into underscores, because the encoded tokens can't have embedded blanks.
287             writeCharacter('_');
288         }
289         // non-ascii chars and the designated specials all get encoded.
290         else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) {
291             writeEncodedCharacter(ch);
292         }
293         else {
294             // good character, just use unchanged.
295             writeCharacter(ch);
296         }
297     }
298 
299 
300     /**
301      * encode the input data producing a UUEncoded output stream.
302      *
303      * @param data   The array of byte data.
304      * @param off    The starting offset within the data.
305      * @param length Length of the data to encode.
306      * @param out    The output stream the encoded data is written to.
307      *
308      * @return the number of bytes produced.
309      */
310     public int encode(byte[] data, int off, int length, OutputStream out) throws IOException {
311         // make sure we're writing to the correct stream
312         this.out = out;
313         bytesWritten = 0;
314 
315         // do the actual encoding
316         return encode(data, off, length);
317     }
318 
319 
320     /**
321      * decode the uuencoded byte data writing it to the given output stream
322      *
323      * @param data   The array of byte data to decode.
324      * @param off    Starting offset within the array.
325      * @param length The length of data to encode.
326      * @param out    The output stream used to return the decoded data.
327      *
328      * @return the number of bytes produced.
329      * @exception IOException
330      */
331     public int decode(byte[] data, int off, int length, OutputStream out) throws IOException {
332         // make sure we're writing to the correct stream
333         this.out = out;
334 
335         int endOffset = off + length;
336         int bytesWritten = 0;
337 
338         while (off < endOffset) {
339             byte ch = data[off++];
340 
341             // space characters are a pain.  We need to scan ahead until we find a non-space character.
342             // if the character is a line terminator, we need to discard the blanks.
343             if (ch == ' ') {
344                 int trailingSpaces = 1;
345                 // scan forward, counting the characters.
346                 while (off < endOffset && data[off] == ' ') {
347                     // step forward and count this.
348                     off++;
349                     trailingSpaces++;
350                 }
351                 // is this a lineend at the current location?
352                 if (off >= endOffset || data[off] == '\r' || data[off] == '\n') {
353                     // go to the next one
354                     continue;
355                 }
356                 else {
357                     // make sure we account for the spaces in the output count.
358                     bytesWritten += trailingSpaces;
359                     // write out the blank characters we counted and continue with the non-blank.
360                     while (trailingSpaces-- > 0) {
361                         out.write(' ');
362                     }
363                 }
364             }
365             else if (ch == '=') {
366                 // we found an encoded character.  Reduce the 3 char sequence to one.
367                 // but first, make sure we have two characters to work with.
368                 if (off + 1 >= endOffset) {
369                     throw new IOException("Invalid quoted printable encoding");
370                 }
371                 // convert the two bytes back from hex.
372                 byte b1 = data[off++];
373                 byte b2 = data[off++];
374 
375                 // we've found an encoded carriage return.  The next char needs to be a newline
376                 if (b1 == '\r') {
377                     if (b2 != '\n') {
378                         throw new IOException("Invalid quoted printable encoding");
379                     }
380                     // this was a soft linebreak inserted by the encoding.  We just toss this away
381                     // on decode.
382                 }
383                 else {
384                     // this is a hex pair we need to convert back to a single byte.
385                     b1 = decodingTable[b1];
386                     b2 = decodingTable[b2];
387                     out.write((b1 << 4) | b2);
388                     // 3 bytes in, one byte out
389                     bytesWritten++;
390                 }
391             }
392             else {
393                 // simple character, just write it out.
394                 out.write(ch);
395                 bytesWritten++;
396             }
397         }
398 
399         return bytesWritten;
400     }
401 
402     /**
403      * Decode a byte array of data.
404      *
405      * @param data   The data array.
406      * @param out    The output stream target for the decoded data.
407      *
408      * @return The number of bytes written to the stream.
409      * @exception IOException
410      */
411     public int decodeWord(byte[] data, OutputStream out) throws IOException {
412         return decodeWord(data, 0, data.length, out);
413     }
414 
415 
416     /**
417      * decode the uuencoded byte data writing it to the given output stream
418      *
419      * @param data   The array of byte data to decode.
420      * @param off    Starting offset within the array.
421      * @param length The length of data to encode.
422      * @param out    The output stream used to return the decoded data.
423      *
424      * @return the number of bytes produced.
425      * @exception IOException
426      */
427     public int decodeWord(byte[] data, int off, int length, OutputStream out) throws IOException {
428         // make sure we're writing to the correct stream
429         this.out = out;
430 
431         int endOffset = off + length;
432         int bytesWritten = 0;
433 
434         while (off < endOffset) {
435             byte ch = data[off++];
436 
437             // space characters were translated to '_' on encode, so we need to translate them back.
438             if (ch == '_') {
439                 out.write(' ');
440             }
441             else if (ch == '=') {
442                 // we found an encoded character.  Reduce the 3 char sequence to one.
443                 // but first, make sure we have two characters to work with.
444                 if (off + 1 >= endOffset) {
445                     throw new IOException("Invalid quoted printable encoding");
446                 }
447                 // convert the two bytes back from hex.
448                 byte b1 = data[off++];
449                 byte b2 = data[off++];
450 
451                 // we've found an encoded carriage return.  The next char needs to be a newline
452                 if (b1 == '\r') {
453                     if (b2 != '\n') {
454                         throw new IOException("Invalid quoted printable encoding");
455                     }
456                     // this was a soft linebreak inserted by the encoding.  We just toss this away
457                     // on decode.
458                 }
459                 else {
460                     // this is a hex pair we need to convert back to a single byte.
461                     byte c1 = decodingTable[b1];
462                     byte c2 = decodingTable[b2];
463                     out.write((c1 << 4) | c2);
464                     // 3 bytes in, one byte out
465                     bytesWritten++;
466                 }
467             }
468             else {
469                 // simple character, just write it out.
470                 out.write(ch);
471                 bytesWritten++;
472             }
473         }
474 
475         return bytesWritten;
476     }
477 
478 
479     /**
480      * decode the UUEncoded String data writing it to the given output stream.
481      *
482      * @param data   The String data to decode.
483      * @param out    The output stream to write the decoded data to.
484      *
485      * @return the number of bytes produced.
486      * @exception IOException
487      */
488     public int decode(String data, OutputStream out) throws IOException {
489         try {
490             // just get the byte data and decode.
491             byte[] bytes = data.getBytes("US-ASCII");
492             return decode(bytes, 0, bytes.length, out);
493         } catch (UnsupportedEncodingException e) {
494             throw new IOException("Invalid UUEncoding");
495         }
496     }
497 
498     private void checkLineLength(int required) throws IOException {
499         // if we're at our line length limit, write out a soft line break and reset.
500         if ((lineCount + required) > lineLength ) {
501             out.write('=');
502             out.write('\r');
503             out.write('\n');
504             bytesWritten += 3;
505             lineCount = 0;
506         }
507     }
508 
509 
510     public void writeEncodedCharacter(int ch) throws IOException {
511         // we need 3 characters for an encoded value
512         checkLineLength(3);
513         out.write('=');
514         out.write(encodingTable[ch >> 4]);
515         out.write(encodingTable[ch & 0x0F]);
516         lineCount += 3;
517         bytesWritten += 3;
518     }
519 
520 
521     public void writeCharacter(int ch) throws IOException {
522         // we need 3 characters for an encoded value
523         checkLineLength(1);
524         out.write(ch);
525         lineCount++;
526         bytesWritten++;
527     }
528 
529 
530     public void writeEOL() throws IOException {
531         out.write('\r');
532         out.write('\n');
533         lineCount = 0;
534         bytesWritten += 3;
535     }
536 
537 
538     public int decode(InputStream in) throws IOException {
539 
540         // we potentially need to scan over spans of whitespace characters to determine if they're real
541         // we just return blanks until the count goes to zero.
542         if (deferredWhitespace > 0) {
543             deferredWhitespace--;
544             return ' ';
545         }
546 
547         // we may have needed to scan ahead to find the first non-blank character, which we would store here.
548         // hand that back once we're done with the blanks.
549         if (cachedCharacter != -1) {
550             int result = cachedCharacter;
551             cachedCharacter = -1;
552             return result;
553         }
554 
555         int ch = in.read();
556 
557         // reflect back an EOF condition.
558         if (ch == -1) {
559             return -1;
560         }
561 
562         // space characters are a pain.  We need to scan ahead until we find a non-space character.
563         // if the character is a line terminator, we need to discard the blanks.
564         if (ch == ' ') {
565             // scan forward, counting the characters.
566             while ((ch = in.read()) == ' ') {
567                 deferredWhitespace++;
568             }
569 
570             // is this a lineend at the current location?
571             if (ch == -1 || ch == '\r' || ch == '\n') {
572                 // those blanks we so zealously counted up don't really exist.  Clear out the counter.
573                 deferredWhitespace = 0;
574                 // return the real significant character now.
575                 return ch;
576             }
577             else {
578             // remember this character for later, after we've used up the deferred blanks.
579                 cachedCharacter = ch;
580                 // return this space.  We did not include this one in the deferred count, so we're right in sync.
581                 return ' ';
582             }
583         }
584         else if (ch == '=') {
585             int b1 = in.read();
586             // we need to get two characters after the quotation marker
587             if (b1 == -1) {
588                 throw new IOException("Truncated quoted printable data");
589             }
590             int b2 = in.read();
591             // we need to get two characters after the quotation marker
592             if (b2 == -1) {
593                 throw new IOException("Truncated quoted printable data");
594             }
595 
596             // we've found an encoded carriage return.  The next char needs to be a newline
597             if (b1 == '\r') {
598                 if (b2 != '\n') {
599                     throw new IOException("Invalid quoted printable encoding");
600                 }
601                 // this was a soft linebreak inserted by the encoding.  We just toss this away
602                 // on decode.  We need to return something, so recurse and decode the next.
603                 return decode(in);
604             }
605             else {
606                 // this is a hex pair we need to convert back to a single byte.
607                 b1 = decodingTable[b1];
608                 b2 = decodingTable[b2];
609                 return (b1 << 4) | b2;
610             }
611         }
612         else {
613             return ch;
614         }
615     }
616 
617 
618     /**
619      * Perform RFC-2047 word encoding using Q-P data encoding.
620      *
621      * @param in       The source for the encoded data.
622      * @param charset  The charset tag to be added to each encoded data section.
623      * @param specials The set of special characters that we require to encoded.
624      * @param out      The output stream where the encoded data is to be written.
625      * @param fold     Controls whether separate sections of encoded data are separated by
626      *                 linebreaks or whitespace.
627      *
628      * @exception IOException
629      */
630     public void encodeWord(InputStream in, String charset, String specials, OutputStream out, boolean fold) throws IOException
631     {
632         // we need to scan ahead in a few places, which may require pushing characters back on to the stream.
633         // make sure we have a stream where this is possible.
634         PushbackInputStream inStream = new PushbackInputStream(in);
635         PrintStream writer = new PrintStream(out);
636 
637         // segments of encoded data are limited to 76 byes, including the control sections.
638         int limit = 76 - 7 - charset.length();
639         boolean firstLine = true;
640         StringBuffer encodedString = new StringBuffer(76);
641 
642         while (true) {
643 
644             // encode another segment of data.
645             encode(inStream, encodedString, specials, limit);
646             // nothing encoded means we've hit the end of the data.
647             if (encodedString.length() == 0) {
648                 break;
649             }
650             // if we have more than one segment, we need to insert separators.  Depending on whether folding
651             // was requested, this is either a blank or a linebreak.
652             if (!firstLine) {
653                 if (fold) {
654                     writer.print("\r\n");
655                 }
656                 else {
657                     writer.print(" ");
658                 }
659             }
660 
661             // add the encoded word header
662             writer.print("=?");
663             writer.print(charset);
664             writer.print("?Q?");
665             // the data
666             writer.print(encodedString.toString());
667             // and the terminator mark
668             writer.print("?=");
669             writer.flush();
670 
671             // we reset the string buffer and reuse it.
672             encodedString.setLength(0);
673         }
674     }
675 }
676 
677 
678