001 /**
002 *
003 * Copyright 2003-2006 The Apache Software Foundation
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.geronimo.mail.util;
019
020 import java.io.BufferedInputStream;
021 import java.io.InputStream;
022 import java.io.IOException;
023
024
025 /**
026 * Set of utility classes for handling common encoding-related
027 * manipulations.
028 */
029 public class ASCIIUtil {
030
031 /**
032 * Test to see if this string contains only US-ASCII (i.e., 7-bit
033 * ASCII) charactes.
034 *
035 * @param s The test string.
036 *
037 * @return true if this is a valid 7-bit ASCII encoding, false if it
038 * contains any non-US ASCII characters.
039 */
040 static public boolean isAscii(String s) {
041 for (int i = 0; i < s.length(); i++) {
042 if (!isAscii(s.charAt(i))) {
043 return false;
044 }
045 }
046 return true;
047 }
048
049 /**
050 * Test to see if a given character can be considered "valid" ASCII.
051 * The excluded characters are the control characters less than
052 * 32, 8-bit characters greater than 127, EXCEPT the CR, LF and
053 * tab characters ARE considered value (all less than 32).
054 *
055 * @param ch The test character.
056 *
057 * @return true if this character meets the "ascii-ness" criteria, false
058 * otherwise.
059 */
060 static public boolean isAscii(int ch) {
061 // these are explicitly considered valid.
062 if (ch == '\r' || ch == '\n' || ch == '\t') {
063 return true;
064 }
065
066 // anything else outside the range is just plain wrong.
067 if (ch >= 127 || ch < 32) {
068 return false;
069 }
070 return true;
071 }
072
073
074 /**
075 * Examine a stream of text and make a judgement on what encoding
076 * type should be used for the text. Ideally, we want to use 7bit
077 * encoding to determine this, but we may need to use either quoted-printable
078 * or base64. The choice is made on the ratio of 7-bit characters to non-7bit.
079 *
080 * @param content An input stream for the content we're examining.
081 *
082 * @exception IOException
083 */
084 public static String getTextTransferEncoding(InputStream content) throws IOException {
085
086 // for efficiency, we'll read in blocks.
087 BufferedInputStream in = new BufferedInputStream(content, 4096);
088
089 int span = 0; // span of characters without a line break.
090 boolean containsLongLines = false;
091 int asciiChars = 0;
092 int nonAsciiChars = 0;
093
094 while (true) {
095 int ch = in.read();
096 // if we hit an EOF here, go decide what type we've actually found.
097 if (ch == -1) {
098 break;
099 }
100
101 // we found a linebreak. Reset the line length counters on either one. We don't
102 // really need to validate here.
103 if (ch == '\n' || ch == '\r') {
104 // hit a line end, reset our line length counter
105 span = 0;
106 }
107 else {
108 span++;
109 // the text has long lines, we can't transfer this as unencoded text.
110 if (span > 998) {
111 containsLongLines = true;
112 }
113
114 // non-ascii character, we have to transfer this in binary.
115 if (!isAscii(ch)) {
116 nonAsciiChars++;
117 }
118 else {
119 asciiChars++;
120 }
121 }
122 }
123
124 // looking good so far, only valid chars here.
125 if (nonAsciiChars == 0) {
126 // does this contain long text lines? We need to use a Q-P encoding which will
127 // be only slightly longer, but handles folding the longer lines.
128 if (containsLongLines) {
129 return "quoted-printable";
130 }
131 else {
132 // ideal! Easiest one to handle.
133 return "7bit";
134 }
135 }
136 else {
137 // mostly characters requiring encoding? Base64 is our best bet.
138 if (nonAsciiChars > asciiChars) {
139 return "base64";
140 }
141 else {
142 // Q-P encoding will use fewer bytes than the full Base64.
143 return "quoted-printable";
144 }
145 }
146 }
147
148
149 /**
150 * Examine a stream of text and make a judgement on what encoding
151 * type should be used for the text. Ideally, we want to use 7bit
152 * encoding to determine this, but we may need to use either quoted-printable
153 * or base64. The choice is made on the ratio of 7-bit characters to non-7bit.
154 *
155 * @param content A string for the content we're examining.
156 */
157 public static String getTextTransferEncoding(String content) {
158
159 int asciiChars = 0;
160 int nonAsciiChars = 0;
161
162 for (int i = 0; i < content.length(); i++) {
163 int ch = content.charAt(i);
164
165 // non-ascii character, we have to transfer this in binary.
166 if (!isAscii(ch)) {
167 nonAsciiChars++;
168 }
169 else {
170 asciiChars++;
171 }
172 }
173
174 // looking good so far, only valid chars here.
175 if (nonAsciiChars == 0) {
176 // ideal! Easiest one to handle.
177 return "7bit";
178 }
179 else {
180 // mostly characters requiring encoding? Base64 is our best bet.
181 if (nonAsciiChars > asciiChars) {
182 return "base64";
183 }
184 else {
185 // Q-P encoding will use fewer bytes than the full Base64.
186 return "quoted-printable";
187 }
188 }
189 }
190
191
192 /**
193 * Determine if the transfer encoding looks like it might be
194 * valid ascii text, and thus transferable as 7bit code. In
195 * order for this to be true, all characters must be valid
196 * 7-bit ASCII code AND all line breaks must be properly formed
197 * (JUST '\r\n' sequences). 7-bit transfers also
198 * typically have a line limit of 1000 bytes (998 + the CRLF), so any
199 * stretch of charactes longer than that will also force Base64 encoding.
200 *
201 * @param content An input stream for the content we're examining.
202 *
203 * @exception IOException
204 */
205 public static String getBinaryTransferEncoding(InputStream content) throws IOException {
206
207 // for efficiency, we'll read in blocks.
208 BufferedInputStream in = new BufferedInputStream(content, 4096);
209
210 int previousChar = 0;
211 int span = 0; // span of characters without a line break.
212
213 while (true) {
214 int ch = in.read();
215 // if we hit an EOF here, we've only found valid text so far, so we can transfer this as
216 // 7-bit ascii.
217 if (ch == -1) {
218 return "7bit";
219 }
220
221 // we found a newline, this is only valid if the previous char was the '\r'
222 if (ch == '\n') {
223 // malformed linebreak? force this to base64 encoding.
224 if (previousChar != '\r') {
225 return "base64";
226 }
227 // hit a line end, reset our line length counter
228 span = 0;
229 }
230 else {
231 span++;
232 // the text has long lines, we can't transfer this as unencoded text.
233 if (span > 998) {
234 return "base64";
235 }
236
237 // non-ascii character, we have to transfer this in binary.
238 if (!isAscii(ch)) {
239 return "base64";
240 }
241 }
242 previousChar = ch;
243 }
244 }
245 }