1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20 package org.apache.geronimo.mail.util;
21
22 import java.io.BufferedInputStream;
23 import java.io.InputStream;
24 import java.io.IOException;
25
26 /**
27 * Set of utility classes for handling common encoding-related
28 * manipulations.
29 */
30 public class ASCIIUtil {
31
32 /**
33 * Test to see if this string contains only US-ASCII (i.e., 7-bit
34 * ASCII) charactes.
35 *
36 * @param s The test string.
37 *
38 * @return true if this is a valid 7-bit ASCII encoding, false if it
39 * contains any non-US ASCII characters.
40 */
41 static public boolean isAscii(String s) {
42 for (int i = 0; i < s.length(); i++) {
43 if (!isAscii(s.charAt(i))) {
44 return false;
45 }
46 }
47 return true;
48 }
49
50 /**
51 * Test to see if a given character can be considered "valid" ASCII.
52 * The excluded characters are the control characters less than
53 * 32, 8-bit characters greater than 127, EXCEPT the CR, LF and
54 * tab characters ARE considered value (all less than 32).
55 *
56 * @param ch The test character.
57 *
58 * @return true if this character meets the "ascii-ness" criteria, false
59 * otherwise.
60 */
61 static public boolean isAscii(int ch) {
62 // these are explicitly considered valid.
63 if (ch == '\r' || ch == '\n' || ch == '\t') {
64 return true;
65 }
66
67 // anything else outside the range is just plain wrong.
68 if (ch >= 127 || ch < 32) {
69 return false;
70 }
71 return true;
72 }
73
74
75 /**
76 * Examine a stream of text and make a judgement on what encoding
77 * type should be used for the text. Ideally, we want to use 7bit
78 * encoding to determine this, but we may need to use either quoted-printable
79 * or base64. The choice is made on the ratio of 7-bit characters to non-7bit.
80 *
81 * @param content An input stream for the content we're examining.
82 *
83 * @exception IOException
84 */
85 public static String getTextTransferEncoding(InputStream content) throws IOException {
86
87 // for efficiency, we'll read in blocks.
88 BufferedInputStream in = new BufferedInputStream(content, 4096);
89
90 int span = 0; // span of characters without a line break.
91 boolean containsLongLines = false;
92 int asciiChars = 0;
93 int nonAsciiChars = 0;
94
95 while (true) {
96 int ch = in.read();
97 // if we hit an EOF here, go decide what type we've actually found.
98 if (ch == -1) {
99 break;
100 }
101
102 // we found a linebreak. Reset the line length counters on either one. We don't
103 // really need to validate here.
104 if (ch == '\n' || ch == '\r') {
105 // hit a line end, reset our line length counter
106 span = 0;
107 }
108 else {
109 span++;
110 // the text has long lines, we can't transfer this as unencoded text.
111 if (span > 998) {
112 containsLongLines = true;
113 }
114
115 // non-ascii character, we have to transfer this in binary.
116 if (!isAscii(ch)) {
117 nonAsciiChars++;
118 }
119 else {
120 asciiChars++;
121 }
122 }
123 }
124
125 // looking good so far, only valid chars here.
126 if (nonAsciiChars == 0) {
127 // does this contain long text lines? We need to use a Q-P encoding which will
128 // be only slightly longer, but handles folding the longer lines.
129 if (containsLongLines) {
130 return "quoted-printable";
131 }
132 else {
133 // ideal! Easiest one to handle.
134 return "7bit";
135 }
136 }
137 else {
138 // mostly characters requiring encoding? Base64 is our best bet.
139 if (nonAsciiChars > asciiChars) {
140 return "base64";
141 }
142 else {
143 // Q-P encoding will use fewer bytes than the full Base64.
144 return "quoted-printable";
145 }
146 }
147 }
148
149
150 /**
151 * Examine a stream of text and make a judgement on what encoding
152 * type should be used for the text. Ideally, we want to use 7bit
153 * encoding to determine this, but we may need to use either quoted-printable
154 * or base64. The choice is made on the ratio of 7-bit characters to non-7bit.
155 *
156 * @param content A string for the content we're examining.
157 */
158 public static String getTextTransferEncoding(String content) {
159
160 int asciiChars = 0;
161 int nonAsciiChars = 0;
162
163 for (int i = 0; i < content.length(); i++) {
164 int ch = content.charAt(i);
165
166 // non-ascii character, we have to transfer this in binary.
167 if (!isAscii(ch)) {
168 nonAsciiChars++;
169 }
170 else {
171 asciiChars++;
172 }
173 }
174
175 // looking good so far, only valid chars here.
176 if (nonAsciiChars == 0) {
177 // ideal! Easiest one to handle.
178 return "7bit";
179 }
180 else {
181 // mostly characters requiring encoding? Base64 is our best bet.
182 if (nonAsciiChars > asciiChars) {
183 return "base64";
184 }
185 else {
186 // Q-P encoding will use fewer bytes than the full Base64.
187 return "quoted-printable";
188 }
189 }
190 }
191
192
193 /**
194 * Determine if the transfer encoding looks like it might be
195 * valid ascii text, and thus transferable as 7bit code. In
196 * order for this to be true, all characters must be valid
197 * 7-bit ASCII code AND all line breaks must be properly formed
198 * (JUST '\r\n' sequences). 7-bit transfers also
199 * typically have a line limit of 1000 bytes (998 + the CRLF), so any
200 * stretch of charactes longer than that will also force Base64 encoding.
201 *
202 * @param content An input stream for the content we're examining.
203 *
204 * @exception IOException
205 */
206 public static String getBinaryTransferEncoding(InputStream content) throws IOException {
207
208 // for efficiency, we'll read in blocks.
209 BufferedInputStream in = new BufferedInputStream(content, 4096);
210
211 int previousChar = 0;
212 int span = 0; // span of characters without a line break.
213
214 while (true) {
215 int ch = in.read();
216 // if we hit an EOF here, we've only found valid text so far, so we can transfer this as
217 // 7-bit ascii.
218 if (ch == -1) {
219 return "7bit";
220 }
221
222 // we found a newline, this is only valid if the previous char was the '\r'
223 if (ch == '\n') {
224 // malformed linebreak? force this to base64 encoding.
225 if (previousChar != '\r') {
226 return "base64";
227 }
228 // hit a line end, reset our line length counter
229 span = 0;
230 }
231 else {
232 span++;
233 // the text has long lines, we can't transfer this as unencoded text.
234 if (span > 998) {
235 return "base64";
236 }
237
238 // non-ascii character, we have to transfer this in binary.
239 if (!isAscii(ch)) {
240 return "base64";
241 }
242 }
243 previousChar = ch;
244 }
245 }
246 }