QuotedPrintableEncoder xref

View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *  http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  
20  package org.apache.geronimo.mail.util;
21  
22  import java.io.EOFException;
23  import java.io.IOException;
24  import java.io.InputStream;
25  import java.io.OutputStream;
26  import java.io.PrintStream;
27  import java.io.PushbackInputStream;
28  import java.io.UnsupportedEncodingException;
29  
30  public class QuotedPrintableEncoder implements Encoder {
31  
32      static protected final byte[] encodingTable =
33      {
34          (byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5', (byte)'6', (byte)'7',
35          (byte)'8', (byte)'9', (byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F'
36      };
37  
38      /*
39       * set up the decoding table.
40       */
41      static protected final byte[] decodingTable = new byte[128];
42  
43      static {
44          // initialize the decoding table
45          for (int i = 0; i < encodingTable.length; i++)
46          {
47              decodingTable[encodingTable[i]] = (byte)i;
48          }
49      }
50  
51  
52      // default number of characters we will write per line.
53      static private final int DEFAULT_CHARS_PER_LINE = 76;
54  
55      // the output stream we're wrapped around
56      protected OutputStream out;
57      // the number of bytes written;
58      protected int bytesWritten = 0;
59      // number of bytes written on the current line
60      protected int lineCount = 0;
61      // line length we're dealing with
62      protected int lineLength;
63      // number of deferred whitespace characters in decode mode.
64      protected int deferredWhitespace = 0;
65  
66      protected int cachedCharacter = -1;
67  
68      // indicates whether the last character was a '\r', potentially part of a CRLF sequence.
69      protected boolean lastCR = false;
70      // remember whether last character was a white space.
71      protected boolean lastWhitespace = false;
72  
73      public QuotedPrintableEncoder() {
74          this(null, DEFAULT_CHARS_PER_LINE);
75      }
76  
77      public QuotedPrintableEncoder(OutputStream out) {
78          this(out, DEFAULT_CHARS_PER_LINE);
79      }
80  
81      public QuotedPrintableEncoder(OutputStream out, int lineLength) {
82          this.out = out;
83          this.lineLength = lineLength;
84      }
85  
86      private void checkDeferred(int ch) throws IOException {
87          // was the last character we looked at a whitespace?  Try to decide what to do with it now.
88          if (lastWhitespace) {
89              // if this whitespace is at the end of the line, write it out encoded
90              if (ch == '\r' || ch == '\n') {
91                  writeEncodedCharacter(' ');
92              }
93              else {
94                  // we can write this out without encoding.
95                  writeCharacter(' ');
96              }
97              // we always turn this off.
98              lastWhitespace = false;
99          }
100         // deferred carriage return?
101         else if (lastCR) {
102             // if the char following the CR was not a new line, write an EOL now.
103             if (ch != '\n') {
104                 writeEOL();
105             }
106             // we always turn this off too
107             lastCR = false;
108         }
109     }
110 
111 
112     /**
113      * encode the input data producing a UUEncoded output stream.
114      *
115      * @param data   The array of byte data.
116      * @param off    The starting offset within the data.
117      * @param length Length of the data to encode.
118      *
119      * @return the number of bytes produced.
120      */
121     public int encode(byte[] data, int off, int length) throws IOException {
122         int endOffset = off + length;
123 
124         while (off < endOffset) {
125             // get the character
126             byte ch = data[off++];
127 
128             // handle the encoding of this character.
129             encode(ch);
130         }
131 
132         return bytesWritten;
133     }
134 
135 
136     public void encode(int ch) throws IOException {
137         // make sure this is just a single byte value.
138         ch = ch &0xFF;
139 
140         // see if we had to defer handling of a whitespace or '\r' character, and handle it if necessary.
141         checkDeferred(ch);
142         // different characters require special handling.
143         switch (ch) {
144             // spaces require special handling.  If the next character is a line terminator, then
145             // the space needs to be encoded.
146             case ' ':
147             {
148                 // at this point, we don't know whether this needs encoding or not.  If the next
149                 // character is a linend, it gets encoded.  If anything else, we just write it as is.
150                 lastWhitespace = true;
151                 // turn off any CR flags.
152                 lastCR = false;
153                 break;
154             }
155 
156             // carriage return, which may be part of a CRLF sequence.
157             case '\r':
158             {
159                 // just flag this until we see the next character.
160                 lastCR = true;
161                 break;
162             }
163 
164             // a new line character...we need to check to see if it was paired up with a '\r' char.
165             case '\n':
166             {
167                 // we always write this out for a newline.  We defer CRs until we see if the LF follows.
168                 writeEOL();
169                 break;
170             }
171 
172             // an '=' is the escape character for an encoded character, so it must also
173             // be written encoded.
174             case '=':
175             {
176                 writeEncodedCharacter(ch);
177                 break;
178             }
179 
180             // all other characters.  If outside the printable character range, write it encoded.
181             default:
182             {
183                 if (ch < 32 || ch >= 127) {
184                     writeEncodedCharacter(ch);
185                 }
186                 else {
187                     writeCharacter(ch);
188                 }
189                 break;
190             }
191         }
192     }
193 
194 
195     /**
196      * encode the input data producing a UUEncoded output stream.
197      *
198      * @param data   The array of byte data.
199      * @param off    The starting offset within the data.
200      * @param length Length of the data to encode.
201      *
202      * @return the number of bytes produced.
203      */
204     public int encode(byte[] data, int off, int length, String specials) throws IOException {
205         int endOffset = off + length;
206 
207         while (off < endOffset) {
208             // get the character
209             byte ch = data[off++];
210 
211             // handle the encoding of this character.
212             encode(ch, specials);
213         }
214 
215         return bytesWritten;
216     }
217 
218 
219     /**
220      * encode the input data producing a UUEncoded output stream.
221      *
222      * @param data   The array of byte data.
223      * @param off    The starting offset within the data.
224      * @param length Length of the data to encode.
225      *
226      * @return the number of bytes produced.
227      */
228     public int encode(PushbackInputStream in, StringBuffer out, String specials, int limit) throws IOException {
229         int count = 0;
230 
231         while (count < limit) {
232             int ch = in.read();
233 
234             if (ch == -1) {
235                 return count;
236             }
237             // make sure this is just a single byte value.
238             ch = ch &0xFF;
239 
240             // spaces require special handling.  If the next character is a line terminator, then
241             // the space needs to be encoded.
242             if (ch == ' ') {
243                 // blanks get translated into underscores, because the encoded tokens can't have embedded blanks.
244                 out.append('_');
245                 count++;
246             }
247             // non-ascii chars and the designated specials all get encoded.
248             else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) {
249                 // we need at least 3 characters to write this out, so we need to
250                 // forget we saw this one and try in the next segment.
251                 if (count + 3 > limit) {
252                     in.unread(ch);
253                     return count;
254                 }
255                 out.append('=');
256                 out.append((char)encodingTable[ch >> 4]);
257                 out.append((char)encodingTable[ch & 0x0F]);
258                 count += 3;
259             }
260             else {
261                 // good character, just use unchanged.
262                 out.append((char)ch);
263                 count++;
264             }
265         }
266         return count;
267     }
268 
269 
270     /**
271      * Specialized version of the decoder that handles encoding of
272      * RFC 2047 encoded word values.  This has special handling for
273      * certain characters, but less special handling for blanks and
274      * linebreaks.
275      *
276      * @param ch
277      * @param specials
278      *
279      * @exception IOException
280      */
281     public void encode(int ch, String specials) throws IOException {
282         // make sure this is just a single byte value.
283         ch = ch &0xFF;
284 
285         // spaces require special handling.  If the next character is a line terminator, then
286         // the space needs to be encoded.
287         if (ch == ' ') {
288             // blanks get translated into underscores, because the encoded tokens can't have embedded blanks.
289             writeCharacter('_');
290         }
291         // non-ascii chars and the designated specials all get encoded.
292         else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) {
293             writeEncodedCharacter(ch);
294         }
295         else {
296             // good character, just use unchanged.
297             writeCharacter(ch);
298         }
299     }
300 
301 
302     /**
303      * encode the input data producing a UUEncoded output stream.
304      *
305      * @param data   The array of byte data.
306      * @param off    The starting offset within the data.
307      * @param length Length of the data to encode.
308      * @param out    The output stream the encoded data is written to.
309      *
310      * @return the number of bytes produced.
311      */
312     public int encode(byte[] data, int off, int length, OutputStream out) throws IOException {
313         // make sure we're writing to the correct stream
314         this.out = out;
315         bytesWritten = 0;
316 
317         // do the actual encoding
318         return encode(data, off, length);
319     }
320 
321 
322     /**
323      * decode the uuencoded byte data writing it to the given output stream
324      *
325      * @param data   The array of byte data to decode.
326      * @param off    Starting offset within the array.
327      * @param length The length of data to encode.
328      * @param out    The output stream used to return the decoded data.
329      *
330      * @return the number of bytes produced.
331      * @exception IOException
332      */
333     public int decode(byte[] data, int off, int length, OutputStream out) throws IOException {
334         // make sure we're writing to the correct stream
335         this.out = out;
336 
337         int endOffset = off + length;
338         int bytesWritten = 0;
339 
340         while (off < endOffset) {
341             byte ch = data[off++];
342 
343             // space characters are a pain.  We need to scan ahead until we find a non-space character.
344             // if the character is a line terminator, we need to discard the blanks.
345             if (ch == ' ') {
346                 int trailingSpaces = 1;
347                 // scan forward, counting the characters.
348                 while (off < endOffset && data[off] == ' ') {
349                     // step forward and count this.
350                     off++;
351                     trailingSpaces++;
352                 }
353                 // is this a lineend at the current location?
354                 if (off >= endOffset || data[off] == '\r' || data[off] == '\n') {
355                     // go to the next one
356                     continue;
357                 }
358                 else {
359                     // make sure we account for the spaces in the output count.
360                     bytesWritten += trailingSpaces;
361                     // write out the blank characters we counted and continue with the non-blank.
362                     while (trailingSpaces-- > 0) {
363                         out.write(' ');
364                     }
365                 }
366             }
367             else if (ch == '=') {
368                 // we found an encoded character.  Reduce the 3 char sequence to one.
369                 // but first, make sure we have two characters to work with.
370                 if (off + 1 >= endOffset) {
371                     throw new IOException("Invalid quoted printable encoding");
372                 }
373                 // convert the two bytes back from hex.
374                 byte b1 = data[off++];
375                 byte b2 = data[off++];
376 
377                 // we've found an encoded carriage return.  The next char needs to be a newline
378                 if (b1 == '\r') {
379                     if (b2 != '\n') {
380                         throw new IOException("Invalid quoted printable encoding");
381                     }
382                     // this was a soft linebreak inserted by the encoding.  We just toss this away
383                     // on decode.
384                 }
385                 else {
386                     // this is a hex pair we need to convert back to a single byte.
387                     b1 = decodingTable[b1];
388                     b2 = decodingTable[b2];
389                     out.write((b1 << 4) | b2);
390                     // 3 bytes in, one byte out
391                     bytesWritten++;
392                 }
393             }
394             else {
395                 // simple character, just write it out.
396                 out.write(ch);
397                 bytesWritten++;
398             }
399         }
400 
401         return bytesWritten;
402     }
403 
404     /**
405      * Decode a byte array of data.
406      *
407      * @param data   The data array.
408      * @param out    The output stream target for the decoded data.
409      *
410      * @return The number of bytes written to the stream.
411      * @exception IOException
412      */
413     public int decodeWord(byte[] data, OutputStream out) throws IOException {
414         return decodeWord(data, 0, data.length, out);
415     }
416 
417 
418     /**
419      * decode the uuencoded byte data writing it to the given output stream
420      *
421      * @param data   The array of byte data to decode.
422      * @param off    Starting offset within the array.
423      * @param length The length of data to encode.
424      * @param out    The output stream used to return the decoded data.
425      *
426      * @return the number of bytes produced.
427      * @exception IOException
428      */
429     public int decodeWord(byte[] data, int off, int length, OutputStream out) throws IOException {
430         // make sure we're writing to the correct stream
431         this.out = out;
432 
433         int endOffset = off + length;
434         int bytesWritten = 0;
435 
436         while (off < endOffset) {
437             byte ch = data[off++];
438 
439             // space characters were translated to '_' on encode, so we need to translate them back.
440             if (ch == '_') {
441                 out.write(' ');
442             }
443             else if (ch == '=') {
444                 // we found an encoded character.  Reduce the 3 char sequence to one.
445                 // but first, make sure we have two characters to work with.
446                 if (off + 1 >= endOffset) {
447                     throw new IOException("Invalid quoted printable encoding");
448                 }
449                 // convert the two bytes back from hex.
450                 byte b1 = data[off++];
451                 byte b2 = data[off++];
452 
453                 // we've found an encoded carriage return.  The next char needs to be a newline
454                 if (b1 == '\r') {
455                     if (b2 != '\n') {
456                         throw new IOException("Invalid quoted printable encoding");
457                     }
458                     // this was a soft linebreak inserted by the encoding.  We just toss this away
459                     // on decode.
460                 }
461                 else {
462                     // this is a hex pair we need to convert back to a single byte.
463                     byte c1 = decodingTable[b1];
464                     byte c2 = decodingTable[b2];
465                     out.write((c1 << 4) | c2);
466                     // 3 bytes in, one byte out
467                     bytesWritten++;
468                 }
469             }
470             else {
471                 // simple character, just write it out.
472                 out.write(ch);
473                 bytesWritten++;
474             }
475         }
476 
477         return bytesWritten;
478     }
479 
480 
481     /**
482      * decode the UUEncoded String data writing it to the given output stream.
483      *
484      * @param data   The String data to decode.
485      * @param out    The output stream to write the decoded data to.
486      *
487      * @return the number of bytes produced.
488      * @exception IOException
489      */
490     public int decode(String data, OutputStream out) throws IOException {
491         try {
492             // just get the byte data and decode.
493             byte[] bytes = data.getBytes("US-ASCII");
494             return decode(bytes, 0, bytes.length, out);
495         } catch (UnsupportedEncodingException e) {
496             throw new IOException("Invalid UUEncoding");
497         }
498     }
499 
500     private void checkLineLength(int required) throws IOException {
501         // if we're at our line length limit, write out a soft line break and reset.
502         if ((lineCount + required) >= lineLength ) {
503             out.write('=');
504             out.write('\r');
505             out.write('\n');
506             bytesWritten += 3;
507             lineCount = 0;
508         }
509     }
510 
511 
512     public void writeEncodedCharacter(int ch) throws IOException {
513         // we need 3 characters for an encoded value
514         checkLineLength(3);
515         out.write('=');
516         out.write(encodingTable[ch >> 4]);
517         out.write(encodingTable[ch & 0x0F]);
518         lineCount += 3;
519         bytesWritten += 3;
520     }
521 
522 
523     public void writeCharacter(int ch) throws IOException {
524         // we need 3 characters for an encoded value
525         checkLineLength(1);
526         out.write(ch);
527         lineCount++;
528         bytesWritten++;
529     }
530 
531 
532     public void writeEOL() throws IOException {
533         out.write('\r');
534         out.write('\n');
535         lineCount = 0;
536         bytesWritten += 3;
537     }
538 
539 
540     public int decode(InputStream in) throws IOException {
541 
542         // we potentially need to scan over spans of whitespace characters to determine if they're real
543         // we just return blanks until the count goes to zero.
544         if (deferredWhitespace > 0) {
545             deferredWhitespace--;
546             return ' ';
547         }
548 
549         // we may have needed to scan ahead to find the first non-blank character, which we would store here.
550         // hand that back once we're done with the blanks.
551         if (cachedCharacter != -1) {
552             int result = cachedCharacter;
553             cachedCharacter = -1;
554             return result;
555         }
556 
557         int ch = in.read();
558 
559         // reflect back an EOF condition.
560         if (ch == -1) {
561             return -1;
562         }
563 
564         // space characters are a pain.  We need to scan ahead until we find a non-space character.
565         // if the character is a line terminator, we need to discard the blanks.
566         if (ch == ' ') {
567             // scan forward, counting the characters.
568             while ((ch = in.read()) == ' ') {
569                 deferredWhitespace++;
570             }
571 
572             // is this a lineend at the current location?
573             if (ch == -1 || ch == '\r' || ch == '\n') {
574                 // those blanks we so zealously counted up don't really exist.  Clear out the counter.
575                 deferredWhitespace = 0;
576                 // return the real significant character now.
577                 return ch;
578             }
579                        // remember this character for later, after we've used up the deferred blanks.
580             cachedCharacter = decodeNonspaceChar(in, ch);
581             // return this space.  We did not include this one in the deferred count, so we're right in sync.
582             return ' ';
583         }
584         return decodeNonspaceChar(in, ch);
585     }
586 
587        private int decodeNonspaceChar(InputStream in, int ch) throws IOException {
588                if (ch == '=') {
589             int b1 = in.read();
590             // we need to get two characters after the quotation marker
591             if (b1 == -1) {
592                 throw new IOException("Truncated quoted printable data");
593             }
594             int b2 = in.read();
595             // we need to get two characters after the quotation marker
596             if (b2 == -1) {
597                 throw new IOException("Truncated quoted printable data");
598             }
599 
600             // we've found an encoded carriage return.  The next char needs to be a newline
601             if (b1 == '\r') {
602                 if (b2 != '\n') {
603                     throw new IOException("Invalid quoted printable encoding");
604                 }
605                 // this was a soft linebreak inserted by the encoding.  We just toss this away
606                 // on decode.  We need to return something, so recurse and decode the next.
607                 return decode(in);
608             }
609             else {
610                 // this is a hex pair we need to convert back to a single byte.
611                 b1 = decodingTable[b1];
612                 b2 = decodingTable[b2];
613                 return (b1 << 4) | b2;
614             }
615         }
616         else {
617             return ch;
618         }
619     }
620 
621 
622     /**
623      * Perform RFC-2047 word encoding using Q-P data encoding.
624      *
625      * @param in       The source for the encoded data.
626      * @param charset  The charset tag to be added to each encoded data section.
627      * @param specials The set of special characters that we require to encoded.
628      * @param out      The output stream where the encoded data is to be written.
629      * @param fold     Controls whether separate sections of encoded data are separated by
630      *                 linebreaks or whitespace.
631      *
632      * @exception IOException
633      */
634     public void encodeWord(InputStream in, String charset, String specials, OutputStream out, boolean fold) throws IOException
635     {
636         // we need to scan ahead in a few places, which may require pushing characters back on to the stream.
637         // make sure we have a stream where this is possible.
638         PushbackInputStream inStream = new PushbackInputStream(in);
639         PrintStream writer = new PrintStream(out);
640 
641         // segments of encoded data are limited to 75 byes, including the control sections.
642         int limit = 75 - 7 - charset.length();
643         boolean firstLine = true;
644         StringBuffer encodedString = new StringBuffer(76);
645 
646         while (true) {
647 
648             // encode another segment of data.
649             encode(inStream, encodedString, specials, limit);
650             // nothing encoded means we've hit the end of the data.
651             if (encodedString.length() == 0) {
652                 break;
653             }
654             // if we have more than one segment, we need to insert separators.  Depending on whether folding
655             // was requested, this is either a blank or a linebreak.
656             if (!firstLine) {
657                 if (fold) {
658                     writer.print("\r\n");
659                 }
660                 else {
661                     writer.print(" ");
662                 }
663             }
664 
665             // add the encoded word header
666             writer.print("=?");
667             writer.print(charset);
668             writer.print("?Q?");
669             // the data
670             writer.print(encodedString.toString());
671             // and the terminator mark
672             writer.print("?=");
673             writer.flush();
674 
675             // we reset the string buffer and reuse it.
676             encodedString.setLength(0);
677             // we need a delimiter between sections from this point on. 
678             firstLine = false;
679         }
680     }
681 
682 
683     /**
684      * Perform RFC-2047 word encoding using Base64 data encoding.
685      *
686      * @param in      The source for the encoded data.
687      * @param charset The charset tag to be added to each encoded data section.
688      * @param out     The output stream where the encoded data is to be written.
689      * @param fold    Controls whether separate sections of encoded data are separated by
690      *                linebreaks or whitespace.
691      *
692      * @exception IOException
693      */
694     public void encodeWord(byte[] data, StringBuffer out, String charset, String specials) throws IOException
695     {
696         // append the word header 
697         out.append("=?");
698         out.append(charset);
699         out.append("?Q?"); 
700         // add on the encodeded data       
701         encodeWordData(data, out, specials); 
702         // the end of the encoding marker 
703         out.append("?="); 
704     }
705 
706 
707     /**
708      * Perform RFC-2047 word encoding using Q-P data encoding.
709      *
710      * @param in       The source for the encoded data.
711      * @param charset  The charset tag to be added to each encoded data section.
712      * @param specials The set of special characters that we require to encoded.
713      * @param out      The output stream where the encoded data is to be written.
714      * @param fold     Controls whether separate sections of encoded data are separated by
715      *                 linebreaks or whitespace.
716      *
717      * @exception IOException
718      */
719     public void encodeWordData(byte[] data, StringBuffer out, String specials) throws IOException {
720         for (int i = 0; i < data.length; i++) {
721             int ch = data[i] & 0xff; ; 
722 
723             // spaces require special handling.  If the next character is a line terminator, then
724             // the space needs to be encoded.
725             if (ch == ' ') {
726                 // blanks get translated into underscores, because the encoded tokens can't have embedded blanks.
727                 out.append('_');
728             }
729             // non-ascii chars and the designated specials all get encoded.
730             else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) {
731                 out.append('=');
732                 out.append((char)encodingTable[ch >> 4]);
733                 out.append((char)encodingTable[ch & 0x0F]);
734             }
735             else {
736                 // good character, just use unchanged.
737                 out.append((char)ch);
738             }
739         }
740     }
741     
742     
743     /**
744      * Estimate the final encoded size of a segment of data. 
745      * This is used to ensure that the encoded blocks do 
746      * not get split across a unicode character boundary and 
747      * that the encoding will fit within the bounds of 
748      * a mail header line. 
749      * 
750      * @param data   The data we're anticipating encoding.
751      * 
752      * @return The size of the byte data in encoded form. 
753      */
754     public int estimateEncodedLength(byte[] data, String specials) 
755     {
756         int count = 0; 
757         
758         for (int i = 0; i < data.length; i++) {
759             // make sure this is just a single byte value.
760             int  ch = data[i] & 0xff;
761 
762             // non-ascii chars and the designated specials all get encoded.
763             if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) {
764                 // Q encoding translates a single char into 3 characters 
765                 count += 3; 
766             }
767             else {
768                 // non-encoded character 
769                 count++;
770             }
771         }
772         return count; 
773     }
774 }
775 
776 
777