View Javadoc

1   /**
2    *
3    * Copyright 2003-2006 The Apache Software Foundation
4    *
5    *  Licensed under the Apache License, Version 2.0 (the "License");
6    *  you may not use this file except in compliance with the License.
7    *  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   */
17  
18  package org.apache.geronimo.mail.util;
19  
20  import java.io.BufferedInputStream;
21  import java.io.InputStream;
22  import java.io.IOException;
23  
24  
25  /**
26   * Set of utility classes for handling common encoding-related
27   * manipulations.
28   */
29  public class ASCIIUtil {
30  
31      /**
32       * Test to see if this string contains only US-ASCII (i.e., 7-bit
33       * ASCII) charactes.
34       *
35       * @param s      The test string.
36       *
37       * @return true if this is a valid 7-bit ASCII encoding, false if it
38       *         contains any non-US ASCII characters.
39       */
40      static public boolean isAscii(String s) {
41          for (int i = 0; i < s.length(); i++) {
42              if (!isAscii(s.charAt(i))) {
43                  return false;
44              }
45          }
46          return true;
47      }
48  
49      /**
50       * Test to see if a given character can be considered "valid" ASCII.
51       * The excluded characters are the control characters less than
52       * 32, 8-bit characters greater than 127, EXCEPT the CR, LF and
53       * tab characters ARE considered value (all less than 32).
54       *
55       * @param ch     The test character.
56       *
57       * @return true if this character meets the "ascii-ness" criteria, false
58       *         otherwise.
59       */
60      static public boolean isAscii(int ch) {
61          // these are explicitly considered valid.
62          if (ch == '\r' || ch == '\n' || ch == '\t') {
63              return true;
64          }
65  
66          // anything else outside the range is just plain wrong.
67          if (ch >= 127 || ch < 32) {
68              return false;
69          }
70          return true;
71      }
72  
73  
74      /**
75       * Examine a stream of text and make a judgement on what encoding
76       * type should be used for the text.  Ideally, we want to use 7bit
77       * encoding to determine this, but we may need to use either quoted-printable
78       * or base64.  The choice is made on the ratio of 7-bit characters to non-7bit.
79       *
80       * @param content     An input stream for the content we're examining.
81       *
82       * @exception IOException
83       */
84      public static String getTextTransferEncoding(InputStream content) throws IOException {
85  
86          // for efficiency, we'll read in blocks.
87          BufferedInputStream in = new BufferedInputStream(content, 4096);
88  
89          int span = 0;            // span of characters without a line break.
90          boolean containsLongLines = false;
91          int asciiChars = 0;
92          int nonAsciiChars = 0;
93  
94          while (true) {
95              int ch = in.read();
96              // if we hit an EOF here, go decide what type we've actually found.
97              if (ch == -1) {
98                  break;
99              }
100 
101             // we found a linebreak.  Reset the line length counters on either one.  We don't
102             // really need to validate here.
103             if (ch == '\n' || ch == '\r') {
104                 // hit a line end, reset our line length counter
105                 span = 0;
106             }
107             else {
108                 span++;
109                 // the text has long lines, we can't transfer this as unencoded text.
110                 if (span > 998) {
111                     containsLongLines = true;
112                 }
113 
114                 // non-ascii character, we have to transfer this in binary.
115                 if (!isAscii(ch)) {
116                     nonAsciiChars++;
117                 }
118                 else {
119                     asciiChars++;
120                 }
121             }
122         }
123 
124         // looking good so far, only valid chars here.
125         if (nonAsciiChars == 0) {
126             // does this contain long text lines?  We need to use a Q-P encoding which will
127             // be only slightly longer, but handles folding the longer lines.
128             if (containsLongLines) {
129                 return "quoted-printable";
130             }
131             else {
132                 // ideal!  Easiest one to handle.
133                 return "7bit";
134             }
135         }
136         else {
137             // mostly characters requiring encoding?  Base64 is our best bet.
138             if (nonAsciiChars > asciiChars) {
139                 return "base64";
140             }
141             else {
142                 // Q-P encoding will use fewer bytes than the full Base64.
143                 return "quoted-printable";
144             }
145         }
146     }
147 
148 
149     /**
150      * Examine a stream of text and make a judgement on what encoding
151      * type should be used for the text.  Ideally, we want to use 7bit
152      * encoding to determine this, but we may need to use either quoted-printable
153      * or base64.  The choice is made on the ratio of 7-bit characters to non-7bit.
154      *
155      * @param content     A string for the content we're examining.
156      */
157     public static String getTextTransferEncoding(String content) {
158 
159         int asciiChars = 0;
160         int nonAsciiChars = 0;
161 
162         for (int i = 0; i < content.length(); i++) {
163             int ch = content.charAt(i);
164 
165             // non-ascii character, we have to transfer this in binary.
166             if (!isAscii(ch)) {
167                 nonAsciiChars++;
168             }
169             else {
170                 asciiChars++;
171             }
172         }
173 
174         // looking good so far, only valid chars here.
175         if (nonAsciiChars == 0) {
176             // ideal!  Easiest one to handle.
177             return "7bit";
178         }
179         else {
180             // mostly characters requiring encoding?  Base64 is our best bet.
181             if (nonAsciiChars > asciiChars) {
182                 return "base64";
183             }
184             else {
185                 // Q-P encoding will use fewer bytes than the full Base64.
186                 return "quoted-printable";
187             }
188         }
189     }
190 
191 
192     /**
193      * Determine if the transfer encoding looks like it might be
194      * valid ascii text, and thus transferable as 7bit code.  In
195      * order for this to be true, all characters must be valid
196      * 7-bit ASCII code AND all line breaks must be properly formed
197      * (JUST '\r\n' sequences).  7-bit transfers also
198      * typically have a line limit of 1000 bytes (998 + the CRLF), so any
199      * stretch of charactes longer than that will also force Base64 encoding.
200      *
201      * @param content     An input stream for the content we're examining.
202      *
203      * @exception IOException
204      */
205     public static String getBinaryTransferEncoding(InputStream content) throws IOException {
206 
207         // for efficiency, we'll read in blocks.
208         BufferedInputStream in = new BufferedInputStream(content, 4096);
209 
210         int previousChar = 0;
211         int span = 0;            // span of characters without a line break.
212 
213         while (true) {
214             int ch = in.read();
215             // if we hit an EOF here, we've only found valid text so far, so we can transfer this as
216             // 7-bit ascii.
217             if (ch == -1) {
218                 return "7bit";
219             }
220 
221             // we found a newline, this is only valid if the previous char was the '\r'
222             if (ch == '\n') {
223                 // malformed linebreak?  force this to base64 encoding.
224                 if (previousChar != '\r') {
225                     return "base64";
226                 }
227                 // hit a line end, reset our line length counter
228                 span = 0;
229             }
230             else {
231                 span++;
232                 // the text has long lines, we can't transfer this as unencoded text.
233                 if (span > 998) {
234                     return "base64";
235                 }
236 
237                 // non-ascii character, we have to transfer this in binary.
238                 if (!isAscii(ch)) {
239                     return "base64";
240                 }
241             }
242             previousChar = ch;
243         }
244     }
245 }