001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied. See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019
020 package org.apache.geronimo.mail.util;
021
022 import java.io.BufferedInputStream;
023 import java.io.InputStream;
024 import java.io.IOException;
025
026 /**
027 * Set of utility classes for handling common encoding-related
028 * manipulations.
029 */
030 public class ASCIIUtil {
031
032 /**
033 * Test to see if this string contains only US-ASCII (i.e., 7-bit
034 * ASCII) charactes.
035 *
036 * @param s The test string.
037 *
038 * @return true if this is a valid 7-bit ASCII encoding, false if it
039 * contains any non-US ASCII characters.
040 */
041 static public boolean isAscii(String s) {
042 for (int i = 0; i < s.length(); i++) {
043 if (!isAscii(s.charAt(i))) {
044 return false;
045 }
046 }
047 return true;
048 }
049
050 /**
051 * Test to see if a given character can be considered "valid" ASCII.
052 * The excluded characters are the control characters less than
053 * 32, 8-bit characters greater than 127, EXCEPT the CR, LF and
054 * tab characters ARE considered value (all less than 32).
055 *
056 * @param ch The test character.
057 *
058 * @return true if this character meets the "ascii-ness" criteria, false
059 * otherwise.
060 */
061 static public boolean isAscii(int ch) {
062 // these are explicitly considered valid.
063 if (ch == '\r' || ch == '\n' || ch == '\t') {
064 return true;
065 }
066
067 // anything else outside the range is just plain wrong.
068 if (ch >= 127 || ch < 32) {
069 return false;
070 }
071 return true;
072 }
073
074
075 /**
076 * Examine a stream of text and make a judgement on what encoding
077 * type should be used for the text. Ideally, we want to use 7bit
078 * encoding to determine this, but we may need to use either quoted-printable
079 * or base64. The choice is made on the ratio of 7-bit characters to non-7bit.
080 *
081 * @param content An input stream for the content we're examining.
082 *
083 * @exception IOException
084 */
085 public static String getTextTransferEncoding(InputStream content) throws IOException {
086
087 // for efficiency, we'll read in blocks.
088 BufferedInputStream in = new BufferedInputStream(content, 4096);
089
090 int span = 0; // span of characters without a line break.
091 boolean containsLongLines = false;
092 int asciiChars = 0;
093 int nonAsciiChars = 0;
094
095 while (true) {
096 int ch = in.read();
097 // if we hit an EOF here, go decide what type we've actually found.
098 if (ch == -1) {
099 break;
100 }
101
102 // we found a linebreak. Reset the line length counters on either one. We don't
103 // really need to validate here.
104 if (ch == '\n' || ch == '\r') {
105 // hit a line end, reset our line length counter
106 span = 0;
107 }
108 else {
109 span++;
110 // the text has long lines, we can't transfer this as unencoded text.
111 if (span > 998) {
112 containsLongLines = true;
113 }
114
115 // non-ascii character, we have to transfer this in binary.
116 if (!isAscii(ch)) {
117 nonAsciiChars++;
118 }
119 else {
120 asciiChars++;
121 }
122 }
123 }
124
125 // looking good so far, only valid chars here.
126 if (nonAsciiChars == 0) {
127 // does this contain long text lines? We need to use a Q-P encoding which will
128 // be only slightly longer, but handles folding the longer lines.
129 if (containsLongLines) {
130 return "quoted-printable";
131 }
132 else {
133 // ideal! Easiest one to handle.
134 return "7bit";
135 }
136 }
137 else {
138 // mostly characters requiring encoding? Base64 is our best bet.
139 if (nonAsciiChars > asciiChars) {
140 return "base64";
141 }
142 else {
143 // Q-P encoding will use fewer bytes than the full Base64.
144 return "quoted-printable";
145 }
146 }
147 }
148
149
150 /**
151 * Examine a stream of text and make a judgement on what encoding
152 * type should be used for the text. Ideally, we want to use 7bit
153 * encoding to determine this, but we may need to use either quoted-printable
154 * or base64. The choice is made on the ratio of 7-bit characters to non-7bit.
155 *
156 * @param content A string for the content we're examining.
157 */
158 public static String getTextTransferEncoding(String content) {
159
160 int asciiChars = 0;
161 int nonAsciiChars = 0;
162
163 for (int i = 0; i < content.length(); i++) {
164 int ch = content.charAt(i);
165
166 // non-ascii character, we have to transfer this in binary.
167 if (!isAscii(ch)) {
168 nonAsciiChars++;
169 }
170 else {
171 asciiChars++;
172 }
173 }
174
175 // looking good so far, only valid chars here.
176 if (nonAsciiChars == 0) {
177 // ideal! Easiest one to handle.
178 return "7bit";
179 }
180 else {
181 // mostly characters requiring encoding? Base64 is our best bet.
182 if (nonAsciiChars > asciiChars) {
183 return "base64";
184 }
185 else {
186 // Q-P encoding will use fewer bytes than the full Base64.
187 return "quoted-printable";
188 }
189 }
190 }
191
192
193 /**
194 * Determine if the transfer encoding looks like it might be
195 * valid ascii text, and thus transferable as 7bit code. In
196 * order for this to be true, all characters must be valid
197 * 7-bit ASCII code AND all line breaks must be properly formed
198 * (JUST '\r\n' sequences). 7-bit transfers also
199 * typically have a line limit of 1000 bytes (998 + the CRLF), so any
200 * stretch of charactes longer than that will also force Base64 encoding.
201 *
202 * @param content An input stream for the content we're examining.
203 *
204 * @exception IOException
205 */
206 public static String getBinaryTransferEncoding(InputStream content) throws IOException {
207
208 // for efficiency, we'll read in blocks.
209 BufferedInputStream in = new BufferedInputStream(content, 4096);
210
211 int previousChar = 0;
212 int span = 0; // span of characters without a line break.
213
214 while (true) {
215 int ch = in.read();
216 // if we hit an EOF here, we've only found valid text so far, so we can transfer this as
217 // 7-bit ascii.
218 if (ch == -1) {
219 return "7bit";
220 }
221
222 // we found a newline, this is only valid if the previous char was the '\r'
223 if (ch == '\n') {
224 // malformed linebreak? force this to base64 encoding.
225 if (previousChar != '\r') {
226 return "base64";
227 }
228 // hit a line end, reset our line length counter
229 span = 0;
230 }
231 else {
232 span++;
233 // the text has long lines, we can't transfer this as unencoded text.
234 if (span > 998) {
235 return "base64";
236 }
237
238 // non-ascii character, we have to transfer this in binary.
239 if (!isAscii(ch)) {
240 return "base64";
241 }
242 }
243 previousChar = ch;
244 }
245 }
246 }