1 /**
2 *
3 * Copyright 2003-2004 The Apache Software Foundation
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.geronimo.mail.util;
19
20 import java.io.BufferedInputStream;
21 import java.io.InputStream;
22 import java.io.IOException;
23
24
25 /**
26 * Set of utility classes for handling common encoding-related
27 * manipulations.
28 */
29 public class ASCIIUtil {
30 private static final String MIME_FOLDTEXT = "mail.mime.foldtext";
31 private static final int FOLD_THRESHOLD = 76;
32
33 /**
34 * Test to see if this string contains only US-ASCII (i.e., 7-bit
35 * ASCII) charactes.
36 *
37 * @param s The test string.
38 *
39 * @return true if this is a valid 7-bit ASCII encoding, false if it
40 * contains any non-US ASCII characters.
41 */
42 static public boolean isAscii(String s) {
43 for (int i = 0; i < s.length(); i++) {
44 if (!isAscii(s.charAt(i))) {
45 return false;
46 }
47 }
48 return true;
49 }
50
51 /**
52 * Test to see if a given character can be considered "valid" ASCII.
53 * The excluded characters are the control characters less than
54 * 32, 8-bit characters greater than 127, EXCEPT the CR, LF and
55 * tab characters ARE considered value (all less than 32).
56 *
57 * @param ch The test character.
58 *
59 * @return true if this character meets the "ascii-ness" criteria, false
60 * otherwise.
61 */
62 static public boolean isAscii(int ch) {
63
64 if (ch == '\r' || ch == '\n' || ch == '\t') {
65 return true;
66 }
67
68
69 if (ch >= 127 || ch < 32) {
70 return false;
71 }
72 return true;
73 }
74
75
76 /**
77 * Examine a stream of text and make a judgement on what encoding
78 * type should be used for the text. Ideally, we want to use 7bit
79 * encoding to determine this, but we may need to use either quoted-printable
80 * or base64. The choice is made on the ratio of 7-bit characters to non-7bit.
81 *
82 * @param content An input stream for the content we're examining.
83 *
84 * @exception IOException
85 */
86 public static String getTextTransferEncoding(InputStream content) throws IOException {
87
88
89 BufferedInputStream in = new BufferedInputStream(content, 4096);
90
91 int span = 0;
92 boolean containsLongLines = false;
93 int asciiChars = 0;
94 int nonAsciiChars = 0;
95
96 while (true) {
97 int ch = in.read();
98
99 if (ch == -1) {
100 break;
101 }
102
103
104
105 if (ch == '\n' || ch == '\r') {
106
107 span = 0;
108 }
109 else {
110 span++;
111
112 if (span > 998) {
113 containsLongLines = true;
114 }
115
116
117 if (!isAscii(ch)) {
118 nonAsciiChars++;
119 }
120 else {
121 asciiChars++;
122 }
123 }
124 }
125
126
127 if (nonAsciiChars == 0) {
128
129
130 if (containsLongLines) {
131 return "quoted-printable";
132 }
133 else {
134
135 return "7bit";
136 }
137 }
138 else {
139
140 if (nonAsciiChars > asciiChars) {
141 return "base64";
142 }
143 else {
144
145 return "quoted-printable";
146 }
147 }
148 }
149
150
151 /**
152 * Examine a stream of text and make a judgement on what encoding
153 * type should be used for the text. Ideally, we want to use 7bit
154 * encoding to determine this, but we may need to use either quoted-printable
155 * or base64. The choice is made on the ratio of 7-bit characters to non-7bit.
156 *
157 * @param content A string for the content we're examining.
158 */
159 public static String getTextTransferEncoding(String content) {
160
161 int asciiChars = 0;
162 int nonAsciiChars = 0;
163
164 for (int i = 0; i < content.length(); i++) {
165 int ch = content.charAt(i);
166
167
168 if (!isAscii(ch)) {
169 nonAsciiChars++;
170 }
171 else {
172 asciiChars++;
173 }
174 }
175
176
177 if (nonAsciiChars == 0) {
178
179 return "7bit";
180 }
181 else {
182
183 if (nonAsciiChars > asciiChars) {
184 return "base64";
185 }
186 else {
187
188 return "quoted-printable";
189 }
190 }
191 }
192
193
194 /**
195 * Determine if the transfer encoding looks like it might be
196 * valid ascii text, and thus transferable as 7bit code. In
197 * order for this to be true, all characters must be valid
198 * 7-bit ASCII code AND all line breaks must be properly formed
199 * (JUST '\r\n' sequences). 7-bit transfers also
200 * typically have a line limit of 1000 bytes (998 + the CRLF), so any
201 * stretch of charactes longer than that will also force Base64 encoding.
202 *
203 * @param content An input stream for the content we're examining.
204 *
205 * @exception IOException
206 */
207 public static String getBinaryTransferEncoding(InputStream content) throws IOException {
208
209
210 BufferedInputStream in = new BufferedInputStream(content, 4096);
211
212 int previousChar = 0;
213 int span = 0;
214
215 while (true) {
216 int ch = in.read();
217
218
219 if (ch == -1) {
220 return "7bit";
221 }
222
223
224 if (ch == '\n') {
225
226 if (previousChar != '\r') {
227 return "base64";
228 }
229
230 span = 0;
231 }
232 else {
233 span++;
234
235 if (span > 998) {
236 return "base64";
237 }
238
239
240 if (!isAscii(ch)) {
241 return "base64";
242 }
243 }
244 previousChar = ch;
245 }
246 }
247
248
249 /**
250 * Perform RFC 2047 text folding on a string of text.
251 *
252 * @param used The amount of text already "used up" on this line. This is
253 * typically the length of a message header that this text
254 * get getting added to.
255 * @param s The text to fold.
256 *
257 * @return The input text, with linebreaks inserted at appropriate fold points.
258 */
259 public static String fold(int used, String s) {
260
261 if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) {
262 return s;
263 }
264
265 int end;
266
267
268
269 for (end = s.length() - 1; end >= 0; end--) {
270 int ch = s.charAt(end);
271 if (ch != ' ' && ch != '\t' ) {
272 break;
273 }
274 }
275
276
277 if (end != s.length() - 1) {
278 s = s.substring(0, end + 1);
279 }
280
281
282 if (s.length() + used <= FOLD_THRESHOLD) {
283 return s;
284 }
285
286
287
288
289 StringBuffer newString = new StringBuffer(s.length() + 8);
290
291
292
293 while (used + s.length() > FOLD_THRESHOLD) {
294 int breakPoint = -1;
295 char breakChar = 0;
296
297
298 for (int i = 0; i < s.length(); i++) {
299
300 if (used + i > FOLD_THRESHOLD) {
301
302
303 if (breakPoint != -1) {
304 break;
305 }
306 }
307 char ch = s.charAt(i);
308
309
310 if (ch == ' ' || ch == '\t') {
311
312 breakPoint = i;
313
314 breakChar = ch;
315 i++;
316 while (i < s.length()) {
317 ch = s.charAt(i);
318 if (ch != ' ' && ch != '\t') {
319 break;
320 }
321 i++;
322 }
323 }
324
325 else if (ch == '\n') {
326 newString.append('\\');
327 newString.append('\n');
328 }
329 else if (ch == '\r') {
330 newString.append('\\');
331 newString.append('\n');
332 i++;
333
334 if (i < s.length() && s.charAt(i) == '\n') {
335 newString.append('\r');
336 }
337 }
338
339 }
340
341 if (breakPoint == -1) {
342 newString.append(s);
343 return newString.toString();
344 }
345 newString.append(s.substring(0, breakPoint));
346 newString.append("\r\n");
347 newString.append(breakChar);
348
349 s = s.substring(breakPoint + 1);
350
351 used = 1;
352 }
353
354
355 newString.append(s);
356 return newString.toString();
357 }
358
359 /**
360 * Unfold a folded string. The unfolding process will remove
361 * any line breaks that are not escaped and which are also followed
362 * by whitespace characters.
363 *
364 * @param s The folded string.
365 *
366 * @return A new string with unfolding rules applied.
367 */
368 public static String unfold(String s) {
369
370 if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) {
371 return s;
372 }
373
374
375 if (s.indexOf('\n') < 0 && s.indexOf('\r') < 0) {
376 return s;
377 }
378
379
380 int length = s.length();
381
382 StringBuffer newString = new StringBuffer(length);
383
384
385 for (int i = 0; i < length; i++) {
386 int ch = s.charAt(i);
387
388
389
390 if (ch == '\\') {
391
392 if (i == length - 1) {
393 newString.append(ch);
394 }
395 else {
396 int nextChar = s.charAt(i + 1);
397
398
399 if (nextChar == '\n') {
400 newString.append('\n');
401 i++;
402 }
403 else if (nextChar == '\r') {
404
405 if (i == length - 2 || s.charAt(i + 2) != '\r') {
406 newString.append('\r');
407 i++;
408 }
409 else {
410
411 newString.append('\r');
412 newString.append('\n');
413 i += 2;
414 }
415 }
416 else {
417
418 newString.append(ch);
419 }
420 }
421 }
422
423 else if (ch == '\n' || ch == '\r') {
424
425 int lineBreak = i;
426 boolean CRLF = false;
427
428 if (ch == '\r') {
429
430 if (i < length - 1 && s.charAt(i + 1) == '\n') {
431 i++;
432
433 CRLF = true;
434 }
435 }
436
437
438 int scan = i + 1;
439
440
441
442 if (scan < length && s.charAt(scan) == ' ') {
443
444 newString.append(' ');
445
446
447 i = scan + 1;
448 while (i < length && s.charAt(i) == ' ') {
449 i++;
450 }
451
452 i--;
453 }
454 else {
455
456 if (CRLF) {
457 newString.append("\r\n");
458 }
459 else {
460 newString.append(ch);
461 }
462 }
463 }
464 else {
465
466 newString.append(ch);
467 }
468 }
469 return newString.toString();
470 }
471 }