001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *  http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    
020    package org.apache.geronimo.mail.util;
021    
022    import java.io.EOFException;
023    import java.io.IOException;
024    import java.io.InputStream;
025    import java.io.OutputStream;
026    import java.io.PrintStream;
027    import java.io.PushbackInputStream;
028    import java.io.UnsupportedEncodingException;
029    
030    public class QuotedPrintableEncoder implements Encoder {
031    
032        static protected final byte[] encodingTable =
033        {
034            (byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5', (byte)'6', (byte)'7',
035            (byte)'8', (byte)'9', (byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F'
036        };
037    
038        /*
039         * set up the decoding table.
040         */
041        static protected final byte[] decodingTable = new byte[128];
042    
043        static {
044            // initialize the decoding table
045            for (int i = 0; i < encodingTable.length; i++)
046            {
047                decodingTable[encodingTable[i]] = (byte)i;
048            }
049        }
050    
051    
052        // default number of characters we will write per line.
053        static private final int DEFAULT_CHARS_PER_LINE = 76;
054    
055        // the output stream we're wrapped around
056        protected OutputStream out;
057        // the number of bytes written;
058        protected int bytesWritten = 0;
059        // number of bytes written on the current line
060        protected int lineCount = 0;
061        // line length we're dealing with
062        protected int lineLength;
063        // number of deferred whitespace characters in decode mode.
064        protected int deferredWhitespace = 0;
065    
066        protected int cachedCharacter = -1;
067    
068        // indicates whether the last character was a '\r', potentially part of a CRLF sequence.
069        protected boolean lastCR = false;
070        // remember whether last character was a white space.
071        protected boolean lastWhitespace = false;
072    
073        public QuotedPrintableEncoder() {
074            this(null, DEFAULT_CHARS_PER_LINE);
075        }
076    
077        public QuotedPrintableEncoder(OutputStream out) {
078            this(out, DEFAULT_CHARS_PER_LINE);
079        }
080    
081        public QuotedPrintableEncoder(OutputStream out, int lineLength) {
082            this.out = out;
083            this.lineLength = lineLength;
084        }
085    
086        private void checkDeferred(int ch) throws IOException {
087            // was the last character we looked at a whitespace?  Try to decide what to do with it now.
088            if (lastWhitespace) {
089                // if this whitespace is at the end of the line, write it out encoded
090                if (ch == '\r' || ch == '\n') {
091                    writeEncodedCharacter(' ');
092                }
093                else {
094                    // we can write this out without encoding.
095                    writeCharacter(' ');
096                }
097                // we always turn this off.
098                lastWhitespace = false;
099            }
100            // deferred carriage return?
101            else if (lastCR) {
102                // if the char following the CR was not a new line, write an EOL now.
103                if (ch != '\n') {
104                    writeEOL();
105                }
106                // we always turn this off too
107                lastCR = false;
108            }
109        }
110    
111    
112        /**
113         * encode the input data producing a UUEncoded output stream.
114         *
115         * @param data   The array of byte data.
116         * @param off    The starting offset within the data.
117         * @param length Length of the data to encode.
118         *
119         * @return the number of bytes produced.
120         */
121        public int encode(byte[] data, int off, int length) throws IOException {
122            int endOffset = off + length;
123    
124            while (off < endOffset) {
125                // get the character
126                byte ch = data[off++];
127    
128                // handle the encoding of this character.
129                encode(ch);
130            }
131    
132            return bytesWritten;
133        }
134    
135    
136        public void encode(int ch) throws IOException {
137            // make sure this is just a single byte value.
138            ch = ch &0xFF;
139    
140            // see if we had to defer handling of a whitespace or '\r' character, and handle it if necessary.
141            checkDeferred(ch);
142            // different characters require special handling.
143            switch (ch) {
144                // spaces require special handling.  If the next character is a line terminator, then
145                // the space needs to be encoded.
146                case ' ':
147                {
148                    // at this point, we don't know whether this needs encoding or not.  If the next
149                    // character is a linend, it gets encoded.  If anything else, we just write it as is.
150                    lastWhitespace = true;
151                    // turn off any CR flags.
152                    lastCR = false;
153                    break;
154                }
155    
156                // carriage return, which may be part of a CRLF sequence.
157                case '\r':
158                {
159                    // just flag this until we see the next character.
160                    lastCR = true;
161                    break;
162                }
163    
164                // a new line character...we need to check to see if it was paired up with a '\r' char.
165                case '\n':
166                {
167                    // we always write this out for a newline.  We defer CRs until we see if the LF follows.
168                    writeEOL();
169                    break;
170                }
171    
172                // an '=' is the escape character for an encoded character, so it must also
173                // be written encoded.
174                case '=':
175                {
176                    writeEncodedCharacter(ch);
177                    break;
178                }
179    
180                // all other characters.  If outside the printable character range, write it encoded.
181                default:
182                {
183                    if (ch < 32 || ch >= 127) {
184                        writeEncodedCharacter(ch);
185                    }
186                    else {
187                        writeCharacter(ch);
188                    }
189                    break;
190                }
191            }
192        }
193    
194    
195        /**
196         * encode the input data producing a UUEncoded output stream.
197         *
198         * @param data   The array of byte data.
199         * @param off    The starting offset within the data.
200         * @param length Length of the data to encode.
201         *
202         * @return the number of bytes produced.
203         */
204        public int encode(byte[] data, int off, int length, String specials) throws IOException {
205            int endOffset = off + length;
206    
207            while (off < endOffset) {
208                // get the character
209                byte ch = data[off++];
210    
211                // handle the encoding of this character.
212                encode(ch, specials);
213            }
214    
215            return bytesWritten;
216        }
217    
218    
219        /**
220         * encode the input data producing a UUEncoded output stream.
221         *
222         * @param data   The array of byte data.
223         * @param off    The starting offset within the data.
224         * @param length Length of the data to encode.
225         *
226         * @return the number of bytes produced.
227         */
228        public int encode(PushbackInputStream in, StringBuffer out, String specials, int limit) throws IOException {
229            int count = 0;
230    
231            while (count < limit) {
232                int ch = in.read();
233    
234                if (ch == -1) {
235                    return count;
236                }
237                // make sure this is just a single byte value.
238                ch = ch &0xFF;
239    
240                // spaces require special handling.  If the next character is a line terminator, then
241                // the space needs to be encoded.
242                if (ch == ' ') {
243                    // blanks get translated into underscores, because the encoded tokens can't have embedded blanks.
244                    out.append('_');
245                    count++;
246                }
247                // non-ascii chars and the designated specials all get encoded.
248                else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) {
249                    // we need at least 3 characters to write this out, so we need to
250                    // forget we saw this one and try in the next segment.
251                    if (count + 3 > limit) {
252                        in.unread(ch);
253                        return count;
254                    }
255                    out.append('=');
256                    out.append((char)encodingTable[ch >> 4]);
257                    out.append((char)encodingTable[ch & 0x0F]);
258                    count += 3;
259                }
260                else {
261                    // good character, just use unchanged.
262                    out.append((char)ch);
263                    count++;
264                }
265            }
266            return count;
267        }
268    
269    
270        /**
271         * Specialized version of the decoder that handles encoding of
272         * RFC 2047 encoded word values.  This has special handling for
273         * certain characters, but less special handling for blanks and
274         * linebreaks.
275         *
276         * @param ch
277         * @param specials
278         *
279         * @exception IOException
280         */
281        public void encode(int ch, String specials) throws IOException {
282            // make sure this is just a single byte value.
283            ch = ch &0xFF;
284    
285            // spaces require special handling.  If the next character is a line terminator, then
286            // the space needs to be encoded.
287            if (ch == ' ') {
288                // blanks get translated into underscores, because the encoded tokens can't have embedded blanks.
289                writeCharacter('_');
290            }
291            // non-ascii chars and the designated specials all get encoded.
292            else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) {
293                writeEncodedCharacter(ch);
294            }
295            else {
296                // good character, just use unchanged.
297                writeCharacter(ch);
298            }
299        }
300    
301    
302        /**
303         * encode the input data producing a UUEncoded output stream.
304         *
305         * @param data   The array of byte data.
306         * @param off    The starting offset within the data.
307         * @param length Length of the data to encode.
308         * @param out    The output stream the encoded data is written to.
309         *
310         * @return the number of bytes produced.
311         */
312        public int encode(byte[] data, int off, int length, OutputStream out) throws IOException {
313            // make sure we're writing to the correct stream
314            this.out = out;
315            bytesWritten = 0;
316    
317            // do the actual encoding
318            return encode(data, off, length);
319        }
320    
321    
322        /**
323         * decode the uuencoded byte data writing it to the given output stream
324         *
325         * @param data   The array of byte data to decode.
326         * @param off    Starting offset within the array.
327         * @param length The length of data to encode.
328         * @param out    The output stream used to return the decoded data.
329         *
330         * @return the number of bytes produced.
331         * @exception IOException
332         */
333        public int decode(byte[] data, int off, int length, OutputStream out) throws IOException {
334            // make sure we're writing to the correct stream
335            this.out = out;
336    
337            int endOffset = off + length;
338            int bytesWritten = 0;
339    
340            while (off < endOffset) {
341                byte ch = data[off++];
342    
343                // space characters are a pain.  We need to scan ahead until we find a non-space character.
344                // if the character is a line terminator, we need to discard the blanks.
345                if (ch == ' ') {
346                    int trailingSpaces = 1;
347                    // scan forward, counting the characters.
348                    while (off < endOffset && data[off] == ' ') {
349                        // step forward and count this.
350                        off++;
351                        trailingSpaces++;
352                    }
353                    // is this a lineend at the current location?
354                    if (off >= endOffset || data[off] == '\r' || data[off] == '\n') {
355                        // go to the next one
356                        continue;
357                    }
358                    else {
359                        // make sure we account for the spaces in the output count.
360                        bytesWritten += trailingSpaces;
361                        // write out the blank characters we counted and continue with the non-blank.
362                        while (trailingSpaces-- > 0) {
363                            out.write(' ');
364                        }
365                    }
366                }
367                else if (ch == '=') {
368                    // we found an encoded character.  Reduce the 3 char sequence to one.
369                    // but first, make sure we have two characters to work with.
370                    if (off + 1 >= endOffset) {
371                        throw new IOException("Invalid quoted printable encoding");
372                    }
373                    // convert the two bytes back from hex.
374                    byte b1 = data[off++];
375                    byte b2 = data[off++];
376    
377                    // we've found an encoded carriage return.  The next char needs to be a newline
378                    if (b1 == '\r') {
379                        if (b2 != '\n') {
380                            throw new IOException("Invalid quoted printable encoding");
381                        }
382                        // this was a soft linebreak inserted by the encoding.  We just toss this away
383                        // on decode.
384                    }
385                    else {
386                        // this is a hex pair we need to convert back to a single byte.
387                        b1 = decodingTable[b1];
388                        b2 = decodingTable[b2];
389                        out.write((b1 << 4) | b2);
390                        // 3 bytes in, one byte out
391                        bytesWritten++;
392                    }
393                }
394                else {
395                    // simple character, just write it out.
396                    out.write(ch);
397                    bytesWritten++;
398                }
399            }
400    
401            return bytesWritten;
402        }
403    
404        /**
405         * Decode a byte array of data.
406         *
407         * @param data   The data array.
408         * @param out    The output stream target for the decoded data.
409         *
410         * @return The number of bytes written to the stream.
411         * @exception IOException
412         */
413        public int decodeWord(byte[] data, OutputStream out) throws IOException {
414            return decodeWord(data, 0, data.length, out);
415        }
416    
417    
418        /**
419         * decode the uuencoded byte data writing it to the given output stream
420         *
421         * @param data   The array of byte data to decode.
422         * @param off    Starting offset within the array.
423         * @param length The length of data to encode.
424         * @param out    The output stream used to return the decoded data.
425         *
426         * @return the number of bytes produced.
427         * @exception IOException
428         */
429        public int decodeWord(byte[] data, int off, int length, OutputStream out) throws IOException {
430            // make sure we're writing to the correct stream
431            this.out = out;
432    
433            int endOffset = off + length;
434            int bytesWritten = 0;
435    
436            while (off < endOffset) {
437                byte ch = data[off++];
438    
439                // space characters were translated to '_' on encode, so we need to translate them back.
440                if (ch == '_') {
441                    out.write(' ');
442                }
443                else if (ch == '=') {
444                    // we found an encoded character.  Reduce the 3 char sequence to one.
445                    // but first, make sure we have two characters to work with.
446                    if (off + 1 >= endOffset) {
447                        throw new IOException("Invalid quoted printable encoding");
448                    }
449                    // convert the two bytes back from hex.
450                    byte b1 = data[off++];
451                    byte b2 = data[off++];
452    
453                    // we've found an encoded carriage return.  The next char needs to be a newline
454                    if (b1 == '\r') {
455                        if (b2 != '\n') {
456                            throw new IOException("Invalid quoted printable encoding");
457                        }
458                        // this was a soft linebreak inserted by the encoding.  We just toss this away
459                        // on decode.
460                    }
461                    else {
462                        // this is a hex pair we need to convert back to a single byte.
463                        byte c1 = decodingTable[b1];
464                        byte c2 = decodingTable[b2];
465                        out.write((c1 << 4) | c2);
466                        // 3 bytes in, one byte out
467                        bytesWritten++;
468                    }
469                }
470                else {
471                    // simple character, just write it out.
472                    out.write(ch);
473                    bytesWritten++;
474                }
475            }
476    
477            return bytesWritten;
478        }
479    
480    
481        /**
482         * decode the UUEncoded String data writing it to the given output stream.
483         *
484         * @param data   The String data to decode.
485         * @param out    The output stream to write the decoded data to.
486         *
487         * @return the number of bytes produced.
488         * @exception IOException
489         */
490        public int decode(String data, OutputStream out) throws IOException {
491            try {
492                // just get the byte data and decode.
493                byte[] bytes = data.getBytes("US-ASCII");
494                return decode(bytes, 0, bytes.length, out);
495            } catch (UnsupportedEncodingException e) {
496                throw new IOException("Invalid UUEncoding");
497            }
498        }
499    
500        private void checkLineLength(int required) throws IOException {
501            // if we're at our line length limit, write out a soft line break and reset.
502            if ((lineCount + required) >= lineLength ) {
503                out.write('=');
504                out.write('\r');
505                out.write('\n');
506                bytesWritten += 3;
507                lineCount = 0;
508            }
509        }
510    
511    
512        public void writeEncodedCharacter(int ch) throws IOException {
513            // we need 3 characters for an encoded value
514            checkLineLength(3);
515            out.write('=');
516            out.write(encodingTable[ch >> 4]);
517            out.write(encodingTable[ch & 0x0F]);
518            lineCount += 3;
519            bytesWritten += 3;
520        }
521    
522    
523        public void writeCharacter(int ch) throws IOException {
524            // we need 3 characters for an encoded value
525            checkLineLength(1);
526            out.write(ch);
527            lineCount++;
528            bytesWritten++;
529        }
530    
531    
532        public void writeEOL() throws IOException {
533            out.write('\r');
534            out.write('\n');
535            lineCount = 0;
536            bytesWritten += 3;
537        }
538    
539    
540        public int decode(InputStream in) throws IOException {
541    
542            // we potentially need to scan over spans of whitespace characters to determine if they're real
543            // we just return blanks until the count goes to zero.
544            if (deferredWhitespace > 0) {
545                deferredWhitespace--;
546                return ' ';
547            }
548    
549            // we may have needed to scan ahead to find the first non-blank character, which we would store here.
550            // hand that back once we're done with the blanks.
551            if (cachedCharacter != -1) {
552                int result = cachedCharacter;
553                cachedCharacter = -1;
554                return result;
555            }
556    
557            int ch = in.read();
558    
559            // reflect back an EOF condition.
560            if (ch == -1) {
561                return -1;
562            }
563    
564            // space characters are a pain.  We need to scan ahead until we find a non-space character.
565            // if the character is a line terminator, we need to discard the blanks.
566            if (ch == ' ') {
567                // scan forward, counting the characters.
568                while ((ch = in.read()) == ' ') {
569                    deferredWhitespace++;
570                }
571    
572                // is this a lineend at the current location?
573                if (ch == -1 || ch == '\r' || ch == '\n') {
574                    // those blanks we so zealously counted up don't really exist.  Clear out the counter.
575                    deferredWhitespace = 0;
576                    // return the real significant character now.
577                    return ch;
578                }
579                           // remember this character for later, after we've used up the deferred blanks.
580                cachedCharacter = decodeNonspaceChar(in, ch);
581                // return this space.  We did not include this one in the deferred count, so we're right in sync.
582                return ' ';
583            }
584            return decodeNonspaceChar(in, ch);
585        }
586    
587           private int decodeNonspaceChar(InputStream in, int ch) throws IOException {
588                   if (ch == '=') {
589                int b1 = in.read();
590                // we need to get two characters after the quotation marker
591                if (b1 == -1) {
592                    throw new IOException("Truncated quoted printable data");
593                }
594                int b2 = in.read();
595                // we need to get two characters after the quotation marker
596                if (b2 == -1) {
597                    throw new IOException("Truncated quoted printable data");
598                }
599    
600                // we've found an encoded carriage return.  The next char needs to be a newline
601                if (b1 == '\r') {
602                    if (b2 != '\n') {
603                        throw new IOException("Invalid quoted printable encoding");
604                    }
605                    // this was a soft linebreak inserted by the encoding.  We just toss this away
606                    // on decode.  We need to return something, so recurse and decode the next.
607                    return decode(in);
608                }
609                else {
610                    // this is a hex pair we need to convert back to a single byte.
611                    b1 = decodingTable[b1];
612                    b2 = decodingTable[b2];
613                    return (b1 << 4) | b2;
614                }
615            }
616            else {
617                return ch;
618            }
619        }
620    
621    
622        /**
623         * Perform RFC-2047 word encoding using Q-P data encoding.
624         *
625         * @param in       The source for the encoded data.
626         * @param charset  The charset tag to be added to each encoded data section.
627         * @param specials The set of special characters that we require to encoded.
628         * @param out      The output stream where the encoded data is to be written.
629         * @param fold     Controls whether separate sections of encoded data are separated by
630         *                 linebreaks or whitespace.
631         *
632         * @exception IOException
633         */
634        public void encodeWord(InputStream in, String charset, String specials, OutputStream out, boolean fold) throws IOException
635        {
636            // we need to scan ahead in a few places, which may require pushing characters back on to the stream.
637            // make sure we have a stream where this is possible.
638            PushbackInputStream inStream = new PushbackInputStream(in);
639            PrintStream writer = new PrintStream(out);
640    
641            // segments of encoded data are limited to 75 byes, including the control sections.
642            int limit = 75 - 7 - charset.length();
643            boolean firstLine = true;
644            StringBuffer encodedString = new StringBuffer(76);
645    
646            while (true) {
647    
648                // encode another segment of data.
649                encode(inStream, encodedString, specials, limit);
650                // nothing encoded means we've hit the end of the data.
651                if (encodedString.length() == 0) {
652                    break;
653                }
654                // if we have more than one segment, we need to insert separators.  Depending on whether folding
655                // was requested, this is either a blank or a linebreak.
656                if (!firstLine) {
657                    if (fold) {
658                        writer.print("\r\n");
659                    }
660                    else {
661                        writer.print(" ");
662                    }
663                }
664    
665                // add the encoded word header
666                writer.print("=?");
667                writer.print(charset);
668                writer.print("?Q?");
669                // the data
670                writer.print(encodedString.toString());
671                // and the terminator mark
672                writer.print("?=");
673                writer.flush();
674    
675                // we reset the string buffer and reuse it.
676                encodedString.setLength(0);
677                // we need a delimiter between sections from this point on. 
678                firstLine = false;
679            }
680        }
681    
682    
683        /**
684         * Perform RFC-2047 word encoding using Base64 data encoding.
685         *
686         * @param in      The source for the encoded data.
687         * @param charset The charset tag to be added to each encoded data section.
688         * @param out     The output stream where the encoded data is to be written.
689         * @param fold    Controls whether separate sections of encoded data are separated by
690         *                linebreaks or whitespace.
691         *
692         * @exception IOException
693         */
694        public void encodeWord(byte[] data, StringBuffer out, String charset, String specials) throws IOException
695        {
696            // append the word header 
697            out.append("=?");
698            out.append(charset);
699            out.append("?Q?"); 
700            // add on the encodeded data       
701            encodeWordData(data, out, specials); 
702            // the end of the encoding marker 
703            out.append("?="); 
704        }
705    
706    
707        /**
708         * Perform RFC-2047 word encoding using Q-P data encoding.
709         *
710         * @param in       The source for the encoded data.
711         * @param charset  The charset tag to be added to each encoded data section.
712         * @param specials The set of special characters that we require to encoded.
713         * @param out      The output stream where the encoded data is to be written.
714         * @param fold     Controls whether separate sections of encoded data are separated by
715         *                 linebreaks or whitespace.
716         *
717         * @exception IOException
718         */
719        public void encodeWordData(byte[] data, StringBuffer out, String specials) throws IOException {
720            for (int i = 0; i < data.length; i++) {
721                int ch = data[i] & 0xff; ; 
722    
723                // spaces require special handling.  If the next character is a line terminator, then
724                // the space needs to be encoded.
725                if (ch == ' ') {
726                    // blanks get translated into underscores, because the encoded tokens can't have embedded blanks.
727                    out.append('_');
728                }
729                // non-ascii chars and the designated specials all get encoded.
730                else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) {
731                    out.append('=');
732                    out.append((char)encodingTable[ch >> 4]);
733                    out.append((char)encodingTable[ch & 0x0F]);
734                }
735                else {
736                    // good character, just use unchanged.
737                    out.append((char)ch);
738                }
739            }
740        }
741        
742        
743        /**
744         * Estimate the final encoded size of a segment of data. 
745         * This is used to ensure that the encoded blocks do 
746         * not get split across a unicode character boundary and 
747         * that the encoding will fit within the bounds of 
748         * a mail header line. 
749         * 
750         * @param data   The data we're anticipating encoding.
751         * 
752         * @return The size of the byte data in encoded form. 
753         */
754        public int estimateEncodedLength(byte[] data, String specials) 
755        {
756            int count = 0; 
757            
758            for (int i = 0; i < data.length; i++) {
759                // make sure this is just a single byte value.
760                int  ch = data[i] & 0xff;
761    
762                // non-ascii chars and the designated specials all get encoded.
763                if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) {
764                    // Q encoding translates a single char into 3 characters 
765                    count += 3; 
766                }
767                else {
768                    // non-encoded character 
769                    count++;
770                }
771            }
772            return count; 
773        }
774    }
775    
776    
777