001    /**
002     *
003     * Copyright 2003-2006 The Apache Software Foundation
004     *
005     *  Licensed under the Apache License, Version 2.0 (the "License");
006     *  you may not use this file except in compliance with the License.
007     *  You may obtain a copy of the License at
008     *
009     *     http://www.apache.org/licenses/LICENSE-2.0
010     *
011     *  Unless required by applicable law or agreed to in writing, software
012     *  distributed under the License is distributed on an "AS IS" BASIS,
013     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     *  See the License for the specific language governing permissions and
015     *  limitations under the License.
016     */
017    
018    package org.apache.geronimo.mail.util;
019    
020    import java.io.EOFException;
021    import java.io.IOException;
022    import java.io.InputStream;
023    import java.io.OutputStream;
024    import java.io.PrintStream;
025    import java.io.PushbackInputStream;
026    import java.io.UnsupportedEncodingException;
027    
028    public class QuotedPrintableEncoder implements Encoder {
029    
030        static protected final byte[] encodingTable =
031        {
032            (byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5', (byte)'6', (byte)'7',
033            (byte)'8', (byte)'9', (byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F'
034        };
035    
036        /*
037         * set up the decoding table.
038         */
039        static protected final byte[] decodingTable = new byte[128];
040    
041        static {
042            // initialize the decoding table
043            for (int i = 0; i < encodingTable.length; i++)
044            {
045                decodingTable[encodingTable[i]] = (byte)i;
046            }
047        }
048    
049    
050        // default number of characters we will write per line.
051        static private final int DEFAULT_CHARS_PER_LINE = 76;
052    
053        // the output stream we're wrapped around
054        protected OutputStream out;
055        // the number of bytes written;
056        protected int bytesWritten = 0;
057        // number of bytes written on the current line
058        protected int lineCount = 0;
059        // line length we're dealing with
060        protected int lineLength;
061        // number of deferred whitespace characters in decode mode.
062        protected int deferredWhitespace = 0;
063    
064        protected int cachedCharacter = -1;
065    
066        // indicates whether the last character was a '\r', potentially part of a CRLF sequence.
067        protected boolean lastCR = false;
068        // remember whether last character was a white space.
069        protected boolean lastWhitespace = false;
070    
071        public QuotedPrintableEncoder() {
072            this(null, DEFAULT_CHARS_PER_LINE);
073        }
074    
075        public QuotedPrintableEncoder(OutputStream out) {
076            this(out, DEFAULT_CHARS_PER_LINE);
077        }
078    
079        public QuotedPrintableEncoder(OutputStream out, int lineLength) {
080            this.out = out;
081            this.lineLength = lineLength;
082        }
083    
084        private void checkDeferred(int ch) throws IOException {
085            // was the last character we looked at a whitespace?  Try to decide what to do with it now.
086            if (lastWhitespace) {
087                // if this whitespace is at the end of the line, write it out encoded
088                if (ch == '\r' || ch == '\n') {
089                    writeEncodedCharacter(' ');
090                }
091                else {
092                    // we can write this out without encoding.
093                    writeCharacter(' ');
094                }
095                // we always turn this off.
096                lastWhitespace = false;
097            }
098            // deferred carriage return?
099            else if (lastCR) {
100                // if the char following the CR was not a new line, write an EOL now.
101                if (ch != '\n') {
102                    writeEOL();
103                }
104                // we always turn this off too
105                lastCR = false;
106            }
107        }
108    
109    
110        /**
111         * encode the input data producing a UUEncoded output stream.
112         *
113         * @param data   The array of byte data.
114         * @param off    The starting offset within the data.
115         * @param length Length of the data to encode.
116         *
117         * @return the number of bytes produced.
118         */
119        public int encode(byte[] data, int off, int length) throws IOException {
120            int endOffset = off + length;
121    
122            while (off < endOffset) {
123                // get the character
124                byte ch = data[off++];
125    
126                // handle the encoding of this character.
127                encode(ch);
128            }
129    
130            return bytesWritten;
131        }
132    
133    
134        public void encode(int ch) throws IOException {
135            // make sure this is just a single byte value.
136            ch = ch &0xFF;
137    
138            // see if we had to defer handling of a whitespace or '\r' character, and handle it if necessary.
139            checkDeferred(ch);
140            // different characters require special handling.
141            switch (ch) {
142                // spaces require special handling.  If the next character is a line terminator, then
143                // the space needs to be encoded.
144                case ' ':
145                {
146                    // at this point, we don't know whether this needs encoding or not.  If the next
147                    // character is a linend, it gets encoded.  If anything else, we just write it as is.
148                    lastWhitespace = true;
149                    // turn off any CR flags.
150                    lastCR = false;
151                    break;
152                }
153    
154                // carriage return, which may be part of a CRLF sequence.
155                case '\r':
156                {
157                    // just flag this until we see the next character.
158                    lastCR = true;
159                    break;
160                }
161    
162                // a new line character...we need to check to see if it was paired up with a '\r' char.
163                case '\n':
164                {
165                    // we always write this out for a newline.  We defer CRs until we see if the LF follows.
166                    writeEOL();
167                    break;
168                }
169    
170                // an '=' is the escape character for an encoded character, so it must also
171                // be written encoded.
172                case '=':
173                {
174                    writeEncodedCharacter(ch);
175                    break;
176                }
177    
178                // all other characters.  If outside the printable character range, write it encoded.
179                default:
180                {
181                    if (ch < 32 || ch >= 127) {
182                        writeEncodedCharacter(ch);
183                    }
184                    else {
185                        writeCharacter(ch);
186                    }
187                    break;
188                }
189            }
190        }
191    
192    
193        /**
194         * encode the input data producing a UUEncoded output stream.
195         *
196         * @param data   The array of byte data.
197         * @param off    The starting offset within the data.
198         * @param length Length of the data to encode.
199         *
200         * @return the number of bytes produced.
201         */
202        public int encode(byte[] data, int off, int length, String specials) throws IOException {
203            int endOffset = off + length;
204    
205            while (off < endOffset) {
206                // get the character
207                byte ch = data[off++];
208    
209                // handle the encoding of this character.
210                encode(ch, specials);
211            }
212    
213            return bytesWritten;
214        }
215    
216    
217        /**
218         * encode the input data producing a UUEncoded output stream.
219         *
220         * @param data   The array of byte data.
221         * @param off    The starting offset within the data.
222         * @param length Length of the data to encode.
223         *
224         * @return the number of bytes produced.
225         */
226        public int encode(PushbackInputStream in, StringBuffer out, String specials, int limit) throws IOException {
227            int count = 0;
228    
229            while (count < limit) {
230                int ch = in.read();
231    
232                if (ch == -1) {
233                    return count;
234                }
235                // make sure this is just a single byte value.
236                ch = ch &0xFF;
237    
238                // spaces require special handling.  If the next character is a line terminator, then
239                // the space needs to be encoded.
240                if (ch == ' ') {
241                    // blanks get translated into underscores, because the encoded tokens can't have embedded blanks.
242                    out.append('_');
243                    count++;
244                }
245                // non-ascii chars and the designated specials all get encoded.
246                else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) {
247                    // we need at least 3 characters to write this out, so we need to
248                    // forget we saw this one and try in the next segment.
249                    if (count + 3 > limit) {
250                        in.unread(ch);
251                        return count;
252                    }
253                    out.append('=');
254                    out.append((char)encodingTable[ch >> 4]);
255                    out.append((char)encodingTable[ch & 0x0F]);
256                    count += 3;
257                }
258                else {
259                    // good character, just use unchanged.
260                    out.append((char)ch);
261                    count++;
262                }
263            }
264            return count;
265        }
266    
267    
268        /**
269         * Specialized version of the decoder that handles encoding of
270         * RFC 2047 encoded word values.  This has special handling for
271         * certain characters, but less special handling for blanks and
272         * linebreaks.
273         *
274         * @param ch
275         * @param specials
276         *
277         * @exception IOException
278         */
279        public void encode(int ch, String specials) throws IOException {
280            // make sure this is just a single byte value.
281            ch = ch &0xFF;
282    
283            // spaces require special handling.  If the next character is a line terminator, then
284            // the space needs to be encoded.
285            if (ch == ' ') {
286                // blanks get translated into underscores, because the encoded tokens can't have embedded blanks.
287                writeCharacter('_');
288            }
289            // non-ascii chars and the designated specials all get encoded.
290            else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) {
291                writeEncodedCharacter(ch);
292            }
293            else {
294                // good character, just use unchanged.
295                writeCharacter(ch);
296            }
297        }
298    
299    
300        /**
301         * encode the input data producing a UUEncoded output stream.
302         *
303         * @param data   The array of byte data.
304         * @param off    The starting offset within the data.
305         * @param length Length of the data to encode.
306         * @param out    The output stream the encoded data is written to.
307         *
308         * @return the number of bytes produced.
309         */
310        public int encode(byte[] data, int off, int length, OutputStream out) throws IOException {
311            // make sure we're writing to the correct stream
312            this.out = out;
313            bytesWritten = 0;
314    
315            // do the actual encoding
316            return encode(data, off, length);
317        }
318    
319    
320        /**
321         * decode the uuencoded byte data writing it to the given output stream
322         *
323         * @param data   The array of byte data to decode.
324         * @param off    Starting offset within the array.
325         * @param length The length of data to encode.
326         * @param out    The output stream used to return the decoded data.
327         *
328         * @return the number of bytes produced.
329         * @exception IOException
330         */
331        public int decode(byte[] data, int off, int length, OutputStream out) throws IOException {
332            // make sure we're writing to the correct stream
333            this.out = out;
334    
335            int endOffset = off + length;
336            int bytesWritten = 0;
337    
338            while (off < endOffset) {
339                byte ch = data[off++];
340    
341                // space characters are a pain.  We need to scan ahead until we find a non-space character.
342                // if the character is a line terminator, we need to discard the blanks.
343                if (ch == ' ') {
344                    int trailingSpaces = 1;
345                    // scan forward, counting the characters.
346                    while (off < endOffset && data[off] == ' ') {
347                        // step forward and count this.
348                        off++;
349                        trailingSpaces++;
350                    }
351                    // is this a lineend at the current location?
352                    if (off >= endOffset || data[off] == '\r' || data[off] == '\n') {
353                        // go to the next one
354                        continue;
355                    }
356                    else {
357                        // make sure we account for the spaces in the output count.
358                        bytesWritten += trailingSpaces;
359                        // write out the blank characters we counted and continue with the non-blank.
360                        while (trailingSpaces-- > 0) {
361                            out.write(' ');
362                        }
363                    }
364                }
365                else if (ch == '=') {
366                    // we found an encoded character.  Reduce the 3 char sequence to one.
367                    // but first, make sure we have two characters to work with.
368                    if (off + 1 >= endOffset) {
369                        throw new IOException("Invalid quoted printable encoding");
370                    }
371                    // convert the two bytes back from hex.
372                    byte b1 = data[off++];
373                    byte b2 = data[off++];
374    
375                    // we've found an encoded carriage return.  The next char needs to be a newline
376                    if (b1 == '\r') {
377                        if (b2 != '\n') {
378                            throw new IOException("Invalid quoted printable encoding");
379                        }
380                        // this was a soft linebreak inserted by the encoding.  We just toss this away
381                        // on decode.
382                    }
383                    else {
384                        // this is a hex pair we need to convert back to a single byte.
385                        b1 = decodingTable[b1];
386                        b2 = decodingTable[b2];
387                        out.write((b1 << 4) | b2);
388                        // 3 bytes in, one byte out
389                        bytesWritten++;
390                    }
391                }
392                else {
393                    // simple character, just write it out.
394                    out.write(ch);
395                    bytesWritten++;
396                }
397            }
398    
399            return bytesWritten;
400        }
401    
402        /**
403         * Decode a byte array of data.
404         *
405         * @param data   The data array.
406         * @param out    The output stream target for the decoded data.
407         *
408         * @return The number of bytes written to the stream.
409         * @exception IOException
410         */
411        public int decodeWord(byte[] data, OutputStream out) throws IOException {
412            return decodeWord(data, 0, data.length, out);
413        }
414    
415    
416        /**
417         * decode the uuencoded byte data writing it to the given output stream
418         *
419         * @param data   The array of byte data to decode.
420         * @param off    Starting offset within the array.
421         * @param length The length of data to encode.
422         * @param out    The output stream used to return the decoded data.
423         *
424         * @return the number of bytes produced.
425         * @exception IOException
426         */
427        public int decodeWord(byte[] data, int off, int length, OutputStream out) throws IOException {
428            // make sure we're writing to the correct stream
429            this.out = out;
430    
431            int endOffset = off + length;
432            int bytesWritten = 0;
433    
434            while (off < endOffset) {
435                byte ch = data[off++];
436    
437                // space characters were translated to '_' on encode, so we need to translate them back.
438                if (ch == '_') {
439                    out.write(' ');
440                }
441                else if (ch == '=') {
442                    // we found an encoded character.  Reduce the 3 char sequence to one.
443                    // but first, make sure we have two characters to work with.
444                    if (off + 1 >= endOffset) {
445                        throw new IOException("Invalid quoted printable encoding");
446                    }
447                    // convert the two bytes back from hex.
448                    byte b1 = data[off++];
449                    byte b2 = data[off++];
450    
451                    // we've found an encoded carriage return.  The next char needs to be a newline
452                    if (b1 == '\r') {
453                        if (b2 != '\n') {
454                            throw new IOException("Invalid quoted printable encoding");
455                        }
456                        // this was a soft linebreak inserted by the encoding.  We just toss this away
457                        // on decode.
458                    }
459                    else {
460                        // this is a hex pair we need to convert back to a single byte.
461                        byte c1 = decodingTable[b1];
462                        byte c2 = decodingTable[b2];
463                        out.write((c1 << 4) | c2);
464                        // 3 bytes in, one byte out
465                        bytesWritten++;
466                    }
467                }
468                else {
469                    // simple character, just write it out.
470                    out.write(ch);
471                    bytesWritten++;
472                }
473            }
474    
475            return bytesWritten;
476        }
477    
478    
479        /**
480         * decode the UUEncoded String data writing it to the given output stream.
481         *
482         * @param data   The String data to decode.
483         * @param out    The output stream to write the decoded data to.
484         *
485         * @return the number of bytes produced.
486         * @exception IOException
487         */
488        public int decode(String data, OutputStream out) throws IOException {
489            try {
490                // just get the byte data and decode.
491                byte[] bytes = data.getBytes("US-ASCII");
492                return decode(bytes, 0, bytes.length, out);
493            } catch (UnsupportedEncodingException e) {
494                throw new IOException("Invalid UUEncoding");
495            }
496        }
497    
498        private void checkLineLength(int required) throws IOException {
499            // if we're at our line length limit, write out a soft line break and reset.
500            if ((lineCount + required) > lineLength ) {
501                out.write('=');
502                out.write('\r');
503                out.write('\n');
504                bytesWritten += 3;
505                lineCount = 0;
506            }
507        }
508    
509    
510        public void writeEncodedCharacter(int ch) throws IOException {
511            // we need 3 characters for an encoded value
512            checkLineLength(3);
513            out.write('=');
514            out.write(encodingTable[ch >> 4]);
515            out.write(encodingTable[ch & 0x0F]);
516            lineCount += 3;
517            bytesWritten += 3;
518        }
519    
520    
521        public void writeCharacter(int ch) throws IOException {
522            // we need 3 characters for an encoded value
523            checkLineLength(1);
524            out.write(ch);
525            lineCount++;
526            bytesWritten++;
527        }
528    
529    
530        public void writeEOL() throws IOException {
531            out.write('\r');
532            out.write('\n');
533            lineCount = 0;
534            bytesWritten += 3;
535        }
536    
537    
538        public int decode(InputStream in) throws IOException {
539    
540            // we potentially need to scan over spans of whitespace characters to determine if they're real
541            // we just return blanks until the count goes to zero.
542            if (deferredWhitespace > 0) {
543                deferredWhitespace--;
544                return ' ';
545            }
546    
547            // we may have needed to scan ahead to find the first non-blank character, which we would store here.
548            // hand that back once we're done with the blanks.
549            if (cachedCharacter != -1) {
550                int result = cachedCharacter;
551                cachedCharacter = -1;
552                return result;
553            }
554    
555            int ch = in.read();
556    
557            // reflect back an EOF condition.
558            if (ch == -1) {
559                return -1;
560            }
561    
562            // space characters are a pain.  We need to scan ahead until we find a non-space character.
563            // if the character is a line terminator, we need to discard the blanks.
564            if (ch == ' ') {
565                // scan forward, counting the characters.
566                while ((ch = in.read()) == ' ') {
567                    deferredWhitespace++;
568                }
569    
570                // is this a lineend at the current location?
571                if (ch == -1 || ch == '\r' || ch == '\n') {
572                    // those blanks we so zealously counted up don't really exist.  Clear out the counter.
573                    deferredWhitespace = 0;
574                    // return the real significant character now.
575                    return ch;
576                }
577                else {
578                // remember this character for later, after we've used up the deferred blanks.
579                    cachedCharacter = ch;
580                    // return this space.  We did not include this one in the deferred count, so we're right in sync.
581                    return ' ';
582                }
583            }
584            else if (ch == '=') {
585                int b1 = in.read();
586                // we need to get two characters after the quotation marker
587                if (b1 == -1) {
588                    throw new IOException("Truncated quoted printable data");
589                }
590                int b2 = in.read();
591                // we need to get two characters after the quotation marker
592                if (b2 == -1) {
593                    throw new IOException("Truncated quoted printable data");
594                }
595    
596                // we've found an encoded carriage return.  The next char needs to be a newline
597                if (b1 == '\r') {
598                    if (b2 != '\n') {
599                        throw new IOException("Invalid quoted printable encoding");
600                    }
601                    // this was a soft linebreak inserted by the encoding.  We just toss this away
602                    // on decode.  We need to return something, so recurse and decode the next.
603                    return decode(in);
604                }
605                else {
606                    // this is a hex pair we need to convert back to a single byte.
607                    b1 = decodingTable[b1];
608                    b2 = decodingTable[b2];
609                    return (b1 << 4) | b2;
610                }
611            }
612            else {
613                return ch;
614            }
615        }
616    
617    
618        /**
619         * Perform RFC-2047 word encoding using Q-P data encoding.
620         *
621         * @param in       The source for the encoded data.
622         * @param charset  The charset tag to be added to each encoded data section.
623         * @param specials The set of special characters that we require to encoded.
624         * @param out      The output stream where the encoded data is to be written.
625         * @param fold     Controls whether separate sections of encoded data are separated by
626         *                 linebreaks or whitespace.
627         *
628         * @exception IOException
629         */
630        public void encodeWord(InputStream in, String charset, String specials, OutputStream out, boolean fold) throws IOException
631        {
632            // we need to scan ahead in a few places, which may require pushing characters back on to the stream.
633            // make sure we have a stream where this is possible.
634            PushbackInputStream inStream = new PushbackInputStream(in);
635            PrintStream writer = new PrintStream(out);
636    
637            // segments of encoded data are limited to 76 byes, including the control sections.
638            int limit = 76 - 7 - charset.length();
639            boolean firstLine = true;
640            StringBuffer encodedString = new StringBuffer(76);
641    
642            while (true) {
643    
644                // encode another segment of data.
645                encode(inStream, encodedString, specials, limit);
646                // nothing encoded means we've hit the end of the data.
647                if (encodedString.length() == 0) {
648                    break;
649                }
650                // if we have more than one segment, we need to insert separators.  Depending on whether folding
651                // was requested, this is either a blank or a linebreak.
652                if (!firstLine) {
653                    if (fold) {
654                        writer.print("\r\n");
655                    }
656                    else {
657                        writer.print(" ");
658                    }
659                }
660    
661                // add the encoded word header
662                writer.print("=?");
663                writer.print(charset);
664                writer.print("?Q?");
665                // the data
666                writer.print(encodedString.toString());
667                // and the terminator mark
668                writer.print("?=");
669                writer.flush();
670    
671                // we reset the string buffer and reuse it.
672                encodedString.setLength(0);
673            }
674        }
675    }
676    
677    
678