|
|||||||||||||||||||
Source file | Conditionals | Statements | Methods | TOTAL | |||||||||||||||
ASCIIUtil.java | 32% | 38.8% | 100% | 37.8% |
|
1 | /** | |
2 | * | |
3 | * Copyright 2003-2004 The Apache Software Foundation | |
4 | * | |
5 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
6 | * you may not use this file except in compliance with the License. | |
7 | * You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, software | |
12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | * See the License for the specific language governing permissions and | |
15 | * limitations under the License. | |
16 | */ | |
17 | ||
18 | package org.apache.geronimo.mail.util; | |
19 | ||
20 | import java.io.BufferedInputStream; | |
21 | import java.io.InputStream; | |
22 | import java.io.IOException; | |
23 | ||
24 | ||
25 | /** | |
26 | * Set of utility classes for handling common encoding-related | |
27 | * manipulations. | |
28 | */ | |
29 | public class ASCIIUtil { | |
30 | private static final String MIME_FOLDTEXT = "mail.mime.foldtext"; | |
31 | private static final int FOLD_THRESHOLD = 76; | |
32 | ||
33 | /** | |
34 | * Test to see if this string contains only US-ASCII (i.e., 7-bit | |
35 | * ASCII) charactes. | |
36 | * | |
37 | * @param s The test string. | |
38 | * | |
39 | * @return true if this is a valid 7-bit ASCII encoding, false if it | |
40 | * contains any non-US ASCII characters. | |
41 | */ | |
42 | 1 | static public boolean isAscii(String s) { |
43 | 1 | for (int i = 0; i < s.length(); i++) { |
44 | 3 | if (!isAscii(s.charAt(i))) { |
45 | 0 | return false; |
46 | } | |
47 | } | |
48 | 1 | return true; |
49 | } | |
50 | ||
51 | /** | |
52 | * Test to see if a given character can be considered "valid" ASCII. | |
53 | * The excluded characters are the control characters less than | |
54 | * 32, 8-bit characters greater than 127, EXCEPT the CR, LF and | |
55 | * tab characters ARE considered value (all less than 32). | |
56 | * | |
57 | * @param ch The test character. | |
58 | * | |
59 | * @return true if this character meets the "ascii-ness" criteria, false | |
60 | * otherwise. | |
61 | */ | |
62 | 614 | static public boolean isAscii(int ch) { |
63 | // these are explicitly considered valid. | |
64 | 614 | if (ch == '\r' || ch == '\n' || ch == '\t') { |
65 | 2 | return true; |
66 | } | |
67 | ||
68 | // anything else outside the range is just plain wrong. | |
69 | 612 | if (ch >= 127 || ch < 32) { |
70 | 30 | return false; |
71 | } | |
72 | 582 | return true; |
73 | } | |
74 | ||
75 | ||
76 | /** | |
77 | * Examine a stream of text and make a judgement on what encoding | |
78 | * type should be used for the text. Ideally, we want to use 7bit | |
79 | * encoding to determine this, but we may need to use either quoted-printable | |
80 | * or base64. The choice is made on the ratio of 7-bit characters to non-7bit. | |
81 | * | |
82 | * @param content An input stream for the content we're examining. | |
83 | * | |
84 | * @exception IOException | |
85 | */ | |
86 | 3 | public static String getTextTransferEncoding(InputStream content) throws IOException { |
87 | ||
88 | // for efficiency, we'll read in blocks. | |
89 | 3 | BufferedInputStream in = new BufferedInputStream(content, 4096); |
90 | ||
91 | 3 | int span = 0; // span of characters without a line break. |
92 | 3 | boolean containsLongLines = false; |
93 | 3 | int asciiChars = 0; |
94 | 3 | int nonAsciiChars = 0; |
95 | ||
96 | 3 | while (true) { |
97 | 33 | int ch = in.read(); |
98 | // if we hit an EOF here, go decide what type we've actually found. | |
99 | 33 | if (ch == -1) { |
100 | 3 | break; |
101 | } | |
102 | ||
103 | // we found a linebreak. Reset the line length counters on either one. We don't | |
104 | // really need to validate here. | |
105 | 30 | if (ch == '\n' || ch == '\r') { |
106 | // hit a line end, reset our line length counter | |
107 | 0 | span = 0; |
108 | } | |
109 | else { | |
110 | 30 | span++; |
111 | // the text has long lines, we can't transfer this as unencoded text. | |
112 | 30 | if (span > 998) { |
113 | 0 | containsLongLines = true; |
114 | } | |
115 | ||
116 | // non-ascii character, we have to transfer this in binary. | |
117 | 30 | if (!isAscii(ch)) { |
118 | 0 | nonAsciiChars++; |
119 | } | |
120 | else { | |
121 | 30 | asciiChars++; |
122 | } | |
123 | } | |
124 | } | |
125 | ||
126 | // looking good so far, only valid chars here. | |
127 | 3 | if (nonAsciiChars == 0) { |
128 | // does this contain long text lines? We need to use a Q-P encoding which will | |
129 | // be only slightly longer, but handles folding the longer lines. | |
130 | 3 | if (containsLongLines) { |
131 | 0 | return "quoted-printable"; |
132 | } | |
133 | else { | |
134 | // ideal! Easiest one to handle. | |
135 | 3 | return "7bit"; |
136 | } | |
137 | } | |
138 | else { | |
139 | // mostly characters requiring encoding? Base64 is our best bet. | |
140 | 0 | if (nonAsciiChars > asciiChars) { |
141 | 0 | return "base64"; |
142 | } | |
143 | else { | |
144 | // Q-P encoding will use fewer bytes than the full Base64. | |
145 | 0 | return "quoted-printable"; |
146 | } | |
147 | } | |
148 | } | |
149 | ||
150 | ||
151 | /** | |
152 | * Examine a stream of text and make a judgement on what encoding | |
153 | * type should be used for the text. Ideally, we want to use 7bit | |
154 | * encoding to determine this, but we may need to use either quoted-printable | |
155 | * or base64. The choice is made on the ratio of 7-bit characters to non-7bit. | |
156 | * | |
157 | * @param content A string for the content we're examining. | |
158 | */ | |
159 | 80 | public static String getTextTransferEncoding(String content) { |
160 | ||
161 | 80 | int asciiChars = 0; |
162 | 80 | int nonAsciiChars = 0; |
163 | ||
164 | 80 | for (int i = 0; i < content.length(); i++) { |
165 | 580 | int ch = content.charAt(i); |
166 | ||
167 | // non-ascii character, we have to transfer this in binary. | |
168 | 580 | if (!isAscii(ch)) { |
169 | 29 | nonAsciiChars++; |
170 | } | |
171 | else { | |
172 | 551 | asciiChars++; |
173 | } | |
174 | } | |
175 | ||
176 | // looking good so far, only valid chars here. | |
177 | 80 | if (nonAsciiChars == 0) { |
178 | // ideal! Easiest one to handle. | |
179 | 69 | return "7bit"; |
180 | } | |
181 | else { | |
182 | // mostly characters requiring encoding? Base64 is our best bet. | |
183 | 11 | if (nonAsciiChars > asciiChars) { |
184 | 0 | return "base64"; |
185 | } | |
186 | else { | |
187 | // Q-P encoding will use fewer bytes than the full Base64. | |
188 | 11 | return "quoted-printable"; |
189 | } | |
190 | } | |
191 | } | |
192 | ||
193 | ||
194 | /** | |
195 | * Determine if the transfer encoding looks like it might be | |
196 | * valid ascii text, and thus transferable as 7bit code. In | |
197 | * order for this to be true, all characters must be valid | |
198 | * 7-bit ASCII code AND all line breaks must be properly formed | |
199 | * (JUST '\r\n' sequences). 7-bit transfers also | |
200 | * typically have a line limit of 1000 bytes (998 + the CRLF), so any | |
201 | * stretch of charactes longer than that will also force Base64 encoding. | |
202 | * | |
203 | * @param content An input stream for the content we're examining. | |
204 | * | |
205 | * @exception IOException | |
206 | */ | |
207 | 1 | public static String getBinaryTransferEncoding(InputStream content) throws IOException { |
208 | ||
209 | // for efficiency, we'll read in blocks. | |
210 | 1 | BufferedInputStream in = new BufferedInputStream(content, 4096); |
211 | ||
212 | 1 | int previousChar = 0; |
213 | 1 | int span = 0; // span of characters without a line break. |
214 | ||
215 | 1 | while (true) { |
216 | 1 | int ch = in.read(); |
217 | // if we hit an EOF here, we've only found valid text so far, so we can transfer this as | |
218 | // 7-bit ascii. | |
219 | 1 | if (ch == -1) { |
220 | 0 | return "7bit"; |
221 | } | |
222 | ||
223 | // we found a newline, this is only valid if the previous char was the '\r' | |
224 | 1 | if (ch == '\n') { |
225 | // malformed linebreak? force this to base64 encoding. | |
226 | 0 | if (previousChar != '\r') { |
227 | 0 | return "base64"; |
228 | } | |
229 | // hit a line end, reset our line length counter | |
230 | 0 | span = 0; |
231 | } | |
232 | else { | |
233 | 1 | span++; |
234 | // the text has long lines, we can't transfer this as unencoded text. | |
235 | 1 | if (span > 998) { |
236 | 0 | return "base64"; |
237 | } | |
238 | ||
239 | // non-ascii character, we have to transfer this in binary. | |
240 | 1 | if (!isAscii(ch)) { |
241 | 1 | return "base64"; |
242 | } | |
243 | } | |
244 | 0 | previousChar = ch; |
245 | } | |
246 | } | |
247 | ||
248 | ||
249 | /** | |
250 | * Perform RFC 2047 text folding on a string of text. | |
251 | * | |
252 | * @param used The amount of text already "used up" on this line. This is | |
253 | * typically the length of a message header that this text | |
254 | * get getting added to. | |
255 | * @param s The text to fold. | |
256 | * | |
257 | * @return The input text, with linebreaks inserted at appropriate fold points. | |
258 | */ | |
259 | 8 | public static String fold(int used, String s) { |
260 | // if folding is disable, unfolding is also. Return the string unchanged. | |
261 | 8 | if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) { |
262 | 0 | return s; |
263 | } | |
264 | ||
265 | 8 | int end; |
266 | ||
267 | // now we need to strip off any trailing "whitespace", where whitespace is blanks, tabs, | |
268 | // and line break characters. | |
269 | 8 | for (end = s.length() - 1; end >= 0; end--) { |
270 | 8 | int ch = s.charAt(end); |
271 | 8 | if (ch != ' ' && ch != '\t' ) { |
272 | 8 | break; |
273 | } | |
274 | } | |
275 | ||
276 | // did we actually find something to remove? Shorten the String to the trimmed length | |
277 | 8 | if (end != s.length() - 1) { |
278 | 0 | s = s.substring(0, end + 1); |
279 | } | |
280 | ||
281 | // does the string as it exists now not require folding? We can just had that back right off. | |
282 | 8 | if (s.length() + used <= FOLD_THRESHOLD) { |
283 | 8 | return s; |
284 | } | |
285 | ||
286 | // get a buffer for the length of the string, plus room for a few line breaks. | |
287 | // these are soft line breaks, so we generally need more that just the line breaks (an escape + | |
288 | // CR + LF + leading space on next line); | |
289 | 0 | StringBuffer newString = new StringBuffer(s.length() + 8); |
290 | ||
291 | ||
292 | // now keep chopping this down until we've accomplished what we need. | |
293 | 0 | while (used + s.length() > FOLD_THRESHOLD) { |
294 | 0 | int breakPoint = -1; |
295 | 0 | char breakChar = 0; |
296 | ||
297 | // now scan for the next place where we can break. | |
298 | 0 | for (int i = 0; i < s.length(); i++) { |
299 | // have we passed the fold limit? | |
300 | 0 | if (used + i > FOLD_THRESHOLD) { |
301 | // if we've already seen a blank, then stop now. Otherwise | |
302 | // we keep going until we hit a fold point. | |
303 | 0 | if (breakPoint != -1) { |
304 | 0 | break; |
305 | } | |
306 | } | |
307 | 0 | char ch = s.charAt(i); |
308 | ||
309 | // a white space character? | |
310 | 0 | if (ch == ' ' || ch == '\t') { |
311 | // this might be a run of white space, so skip over those now. | |
312 | 0 | breakPoint = i; |
313 | // we need to maintain the same character type after the inserted linebreak. | |
314 | 0 | breakChar = ch; |
315 | 0 | i++; |
316 | 0 | while (i < s.length()) { |
317 | 0 | ch = s.charAt(i); |
318 | 0 | if (ch != ' ' && ch != '\t') { |
319 | 0 | break; |
320 | } | |
321 | 0 | i++; |
322 | } | |
323 | } | |
324 | // found an embedded new line. Escape this so that the unfolding process preserves it. | |
325 | 0 | else if (ch == '\n') { |
326 | 0 | newString.append('\\'); |
327 | 0 | newString.append('\n'); |
328 | } | |
329 | 0 | else if (ch == '\r') { |
330 | 0 | newString.append('\\'); |
331 | 0 | newString.append('\n'); |
332 | 0 | i++; |
333 | // if this is a CRLF pair, add the second char also | |
334 | 0 | if (i < s.length() && s.charAt(i) == '\n') { |
335 | 0 | newString.append('\r'); |
336 | } | |
337 | } | |
338 | ||
339 | } | |
340 | // no fold point found, we punt, append the remainder and leave. | |
341 | 0 | if (breakPoint == -1) { |
342 | 0 | newString.append(s); |
343 | 0 | return newString.toString(); |
344 | } | |
345 | 0 | newString.append(s.substring(0, breakPoint)); |
346 | 0 | newString.append("\r\n"); |
347 | 0 | newString.append(breakChar); |
348 | // chop the string | |
349 | 0 | s = s.substring(breakPoint + 1); |
350 | // start again, and we've used the first char of the limit already with the whitespace char. | |
351 | 0 | used = 1; |
352 | } | |
353 | ||
354 | // add on the remainder, and return | |
355 | 0 | newString.append(s); |
356 | 0 | return newString.toString(); |
357 | } | |
358 | ||
359 | /** | |
360 | * Unfold a folded string. The unfolding process will remove | |
361 | * any line breaks that are not escaped and which are also followed | |
362 | * by whitespace characters. | |
363 | * | |
364 | * @param s The folded string. | |
365 | * | |
366 | * @return A new string with unfolding rules applied. | |
367 | */ | |
368 | 6 | public static String unfold(String s) { |
369 | // if folding is disable, unfolding is also. Return the string unchanged. | |
370 | 6 | if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) { |
371 | 0 | return s; |
372 | } | |
373 | ||
374 | // if there are no line break characters in the string, we can just return this. | |
375 | 6 | if (s.indexOf('\n') < 0 && s.indexOf('\r') < 0) { |
376 | 6 | return s; |
377 | } | |
378 | ||
379 | // we need to scan and fix things up. | |
380 | 0 | int length = s.length(); |
381 | ||
382 | 0 | StringBuffer newString = new StringBuffer(length); |
383 | ||
384 | // scan the entire string | |
385 | 0 | for (int i = 0; i < length; i++) { |
386 | 0 | int ch = s.charAt(i); |
387 | ||
388 | // we have a backslash. In folded strings, escape characters are only processed as such if | |
389 | // they preceed line breaks. Otherwise, we leave it be. | |
390 | 0 | if (ch == '\\') { |
391 | // escape at the very end? Just add the character. | |
392 | 0 | if (i == length - 1) { |
393 | 0 | newString.append(ch); |
394 | } | |
395 | else { | |
396 | 0 | int nextChar = s.charAt(i + 1); |
397 | ||
398 | // naked newline? Add the new line to the buffer, and skip the escape char. | |
399 | 0 | if (nextChar == '\n') { |
400 | 0 | newString.append('\n'); |
401 | 0 | i++; |
402 | } | |
403 | 0 | else if (nextChar == '\r') { |
404 | // just the CR left? Add it, removing the escape. | |
405 | 0 | if (i == length - 2 || s.charAt(i + 2) != '\r') { |
406 | 0 | newString.append('\r'); |
407 | 0 | i++; |
408 | } | |
409 | else { | |
410 | // toss the escape, add both parts of the CRLF, and skip over two chars. | |
411 | 0 | newString.append('\r'); |
412 | 0 | newString.append('\n'); |
413 | 0 | i += 2; |
414 | } | |
415 | } | |
416 | else { | |
417 | // an escape for another purpose, just copy it over. | |
418 | 0 | newString.append(ch); |
419 | } | |
420 | } | |
421 | } | |
422 | // we have an unescaped line break | |
423 | 0 | else if (ch == '\n' || ch == '\r') { |
424 | // remember the position in case we need to backtrack. | |
425 | 0 | int lineBreak = i; |
426 | 0 | boolean CRLF = false; |
427 | ||
428 | 0 | if (ch == '\r') { |
429 | // check to see if we need to step over this. | |
430 | 0 | if (i < length - 1 && s.charAt(i + 1) == '\n') { |
431 | 0 | i++; |
432 | // flag the type so we know what we might need to preserve. | |
433 | 0 | CRLF = true; |
434 | } | |
435 | } | |
436 | ||
437 | // get a temp position scanner. | |
438 | 0 | int scan = i + 1; |
439 | ||
440 | // does a blank follow this new line? we need to scrap the new line and reduce the leading blanks | |
441 | // down to a single blank. | |
442 | 0 | if (scan < length && s.charAt(scan) == ' ') { |
443 | // add the character | |
444 | 0 | newString.append(' '); |
445 | ||
446 | // scan over the rest of the blanks | |
447 | 0 | i = scan + 1; |
448 | 0 | while (i < length && s.charAt(i) == ' ') { |
449 | 0 | i++; |
450 | } | |
451 | // we'll increment down below, so back up to the last blank as the current char. | |
452 | 0 | i--; |
453 | } | |
454 | else { | |
455 | // we must keep this line break. Append the appropriate style. | |
456 | 0 | if (CRLF) { |
457 | 0 | newString.append("\r\n"); |
458 | } | |
459 | else { | |
460 | 0 | newString.append(ch); |
461 | } | |
462 | } | |
463 | } | |
464 | else { | |
465 | // just a normal, ordinary character | |
466 | 0 | newString.append(ch); |
467 | } | |
468 | } | |
469 | 0 | return newString.toString(); |
470 | } | |
471 | } |
|