001    /**
002     *
003     * Copyright 2003-2004 The Apache Software Foundation
004     *
005     *  Licensed under the Apache License, Version 2.0 (the "License");
006     *  you may not use this file except in compliance with the License.
007     *  You may obtain a copy of the License at
008     *
009     *     http://www.apache.org/licenses/LICENSE-2.0
010     *
011     *  Unless required by applicable law or agreed to in writing, software
012     *  distributed under the License is distributed on an "AS IS" BASIS,
013     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     *  See the License for the specific language governing permissions and
015     *  limitations under the License.
016     */
017    
018    package org.apache.geronimo.mail.util;
019    
020    import java.io.ByteArrayOutputStream;
021    import java.io.IOException;
022    import java.io.OutputStream;
023    import java.io.UnsupportedEncodingException;
024    
025    import javax.mail.internet.MimeUtility;
026    
027    /**
028     * Encoder for RFC2231 encoded parameters
029     *
030     * RFC2231 string are encoded as
031     *
032     *    charset'language'encoded-text
033     *
034     * and
035     *
036     *    encoded-text = *(char / hexchar)
037     *
038     * where
039     *
040     *    char is any ASCII character in the range 33-126, EXCEPT
041     *    the characters "%" and " ".
042     *
043     *    hexchar is an ASCII "%" followed by two upper case
044     *    hexadecimal digits.
045     */
046    
047    public class RFC2231Encoder implements Encoder
048    {
049        protected final byte[] encodingTable =
050            {
051                (byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5', (byte)'6', (byte)'7',
052                (byte)'8', (byte)'9', (byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F'
053            };
054    
055        protected String DEFAULT_SPECIALS = " *'%";
056        protected String specials = DEFAULT_SPECIALS;
057    
058        /*
059         * set up the decoding table.
060         */
061        protected final byte[] decodingTable = new byte[128];
062    
063        protected void initialiseDecodingTable()
064        {
065            for (int i = 0; i < encodingTable.length; i++)
066            {
067                decodingTable[encodingTable[i]] = (byte)i;
068            }
069        }
070    
071        public RFC2231Encoder()
072        {
073            this(null);
074        }
075    
076        public RFC2231Encoder(String specials)
077        {
078            if (specials != null) {
079                this.specials = DEFAULT_SPECIALS + specials;
080            }
081            initialiseDecodingTable();
082        }
083    
084    
085        /**
086         * encode the input data producing an RFC2231 output stream.
087         *
088         * @return the number of bytes produced.
089         */
090        public int encode(byte[] data, int off, int length, OutputStream out) throws IOException {
091    
092            int bytesWritten = 0;
093            for (int i = off; i < (off + length); i++)
094            {
095                int ch = data[i] & 0xff;
096                // character tha must be encoded?  Prefix with a '%' and encode in hex.
097                if (ch <= 32 || ch >= 127 || specials.indexOf(ch) != -1) {
098                    out.write((byte)'%');
099                    out.write(encodingTable[ch >> 4]);
100                    out.write(encodingTable[ch & 0xf]);
101                    bytesWritten += 3;
102                }
103                else {
104                    // add unchanged.
105                    out.write((byte)ch);
106                    bytesWritten++;
107                }
108            }
109    
110            return bytesWritten;
111        }
112    
113    
114        /**
115         * decode the RFC2231 encoded byte data writing it to the given output stream
116         *
117         * @return the number of bytes produced.
118         */
119        public int decode(byte[] data, int off, int length, OutputStream out) throws IOException {
120            int        outLen = 0;
121            int        end = off + length;
122    
123            int i = off;
124            while (i < end)
125            {
126                byte v = data[i++];
127                // a percent is a hex character marker, need to decode a hex value.
128                if (v == '%') {
129                    byte b1 = decodingTable[data[i++]];
130                    byte b2 = decodingTable[data[i++]];
131                    out.write((b1 << 4) | b2);
132                }
133                else {
134                    // copied over unchanged.
135                    out.write(v);
136                }
137                // always just one byte added
138                outLen++;
139            }
140    
141            return outLen;
142        }
143    
144        /**
145         * decode the RFC2231 encoded String data writing it to the given output stream.
146         *
147         * @return the number of bytes produced.
148         */
149        public int decode(String data, OutputStream out) throws IOException
150        {
151            int        length = 0;
152            int        end = data.length();
153    
154            int i = 0;
155            while (i < end)
156            {
157                char v = data.charAt(i++);
158                if (v == '%') {
159                    byte b1 = decodingTable[data.charAt(i++)];
160                    byte b2 = decodingTable[data.charAt(i++)];
161    
162                    out.write((b1 << 4) | b2);
163                }
164                else {
165                    out.write((byte)v);
166                }
167                length++;
168            }
169    
170            return length;
171        }
172    
173    
174        /**
175         * Encode a string as an RFC2231 encoded parameter, using the
176         * given character set and language.
177         *
178         * @param charset  The source character set (the MIME version).
179         * @param language The encoding language.
180         * @param data     The data to encode.
181         *
182         * @return The encoded string.
183         */
184        public String encode(String charset, String language, String data) throws IOException {
185    
186            byte[] bytes = null;
187            try {
188                // the charset we're adding is the MIME-defined name.  We need the java version
189                // in order to extract the bytes.
190                bytes = data.getBytes(MimeUtility.javaCharset(charset));
191            } catch (UnsupportedEncodingException e) {
192                // we have a translation problem here.
193                return null;
194            }
195    
196            StringBuffer result = new StringBuffer();
197    
198            // append the character set, if we have it.
199            if (charset != null) {
200                result.append(charset);
201            }
202            // the field marker is required.
203            result.append("'");
204    
205            // and the same for the language.
206            if (language != null) {
207                result.append(language);
208            }
209            // the field marker is required.
210            result.append("'");
211    
212            // wrap an output stream around our buffer for the decoding
213            OutputStream out = new StringBufferOutputStream(result);
214    
215            // encode the data stream
216            encode(bytes, 0, bytes.length, out);
217    
218            // finis!
219            return result.toString();
220        }
221    
222    
223        /**
224         * Decode an RFC2231 encoded string.
225         *
226         * @param data   The data to decode.
227         *
228         * @return The decoded string.
229         * @exception IOException
230         * @exception UnsupportedEncodingException
231         */
232        public String decode(String data) throws IOException, UnsupportedEncodingException {
233            // get the end of the language field
234            int charsetEnd = data.indexOf('\'');
235            // uh oh, might not be there
236            if (charsetEnd == -1) {
237                throw new IOException("Missing charset in RFC2231 encoded value");
238            }
239    
240            String charset = data.substring(0, charsetEnd);
241    
242            // now pull out the language the same way
243            int languageEnd = data.indexOf('\'', charsetEnd + 1);
244            if (languageEnd == -1) {
245                throw new IOException("Missing language in RFC2231 encoded value");
246            }
247    
248            String language = data.substring(charsetEnd + 1, languageEnd);
249    
250            ByteArrayOutputStream out = new ByteArrayOutputStream(data.length());
251    
252            // decode the data
253            decode(data.substring(languageEnd + 1), out);
254    
255            byte[] bytes = out.toByteArray();
256            // build a new string from this using the java version of the encoded charset.
257            return new String(bytes, 0, bytes.length, MimeUtility.javaCharset(charset));
258        }
259    }