|
|||||||||||||||||||
Source file | Conditionals | Statements | Methods | TOTAL | |||||||||||||||
ASCIIUtil.java | 85% | 90.3% | 100% | 88.8% |
|
1 | /** | |
2 | * | |
3 | * Copyright 2003-2006 The Apache Software Foundation | |
4 | * | |
5 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
6 | * you may not use this file except in compliance with the License. | |
7 | * You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, software | |
12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | * See the License for the specific language governing permissions and | |
15 | * limitations under the License. | |
16 | */ | |
17 | ||
18 | package org.apache.geronimo.mail.util; | |
19 | ||
20 | import java.io.BufferedInputStream; | |
21 | import java.io.InputStream; | |
22 | import java.io.IOException; | |
23 | ||
24 | ||
25 | /** | |
26 | * Set of utility classes for handling common encoding-related | |
27 | * manipulations. | |
28 | */ | |
29 | public class ASCIIUtil { | |
30 | ||
31 | /** | |
32 | * Test to see if this string contains only US-ASCII (i.e., 7-bit | |
33 | * ASCII) charactes. | |
34 | * | |
35 | * @param s The test string. | |
36 | * | |
37 | * @return true if this is a valid 7-bit ASCII encoding, false if it | |
38 | * contains any non-US ASCII characters. | |
39 | */ | |
40 | 4 | static public boolean isAscii(String s) { |
41 | 4 | for (int i = 0; i < s.length(); i++) { |
42 | 37 | if (!isAscii(s.charAt(i))) { |
43 | 1 | return false; |
44 | } | |
45 | } | |
46 | 3 | return true; |
47 | } | |
48 | ||
49 | /** | |
50 | * Test to see if a given character can be considered "valid" ASCII. | |
51 | * The excluded characters are the control characters less than | |
52 | * 32, 8-bit characters greater than 127, EXCEPT the CR, LF and | |
53 | * tab characters ARE considered value (all less than 32). | |
54 | * | |
55 | * @param ch The test character. | |
56 | * | |
57 | * @return true if this character meets the "ascii-ness" criteria, false | |
58 | * otherwise. | |
59 | */ | |
60 | 859 | static public boolean isAscii(int ch) { |
61 | // these are explicitly considered valid. | |
62 | 859 | if (ch == '\r' || ch == '\n' || ch == '\t') { |
63 | 4 | return true; |
64 | } | |
65 | ||
66 | // anything else outside the range is just plain wrong. | |
67 | 855 | if (ch >= 127 || ch < 32) { |
68 | 36 | return false; |
69 | } | |
70 | 819 | return true; |
71 | } | |
72 | ||
73 | ||
74 | /** | |
75 | * Examine a stream of text and make a judgement on what encoding | |
76 | * type should be used for the text. Ideally, we want to use 7bit | |
77 | * encoding to determine this, but we may need to use either quoted-printable | |
78 | * or base64. The choice is made on the ratio of 7-bit characters to non-7bit. | |
79 | * | |
80 | * @param content An input stream for the content we're examining. | |
81 | * | |
82 | * @exception IOException | |
83 | */ | |
84 | 10 | public static String getTextTransferEncoding(InputStream content) throws IOException { |
85 | ||
86 | // for efficiency, we'll read in blocks. | |
87 | 10 | BufferedInputStream in = new BufferedInputStream(content, 4096); |
88 | ||
89 | 10 | int span = 0; // span of characters without a line break. |
90 | 10 | boolean containsLongLines = false; |
91 | 10 | int asciiChars = 0; |
92 | 10 | int nonAsciiChars = 0; |
93 | ||
94 | 10 | while (true) { |
95 | 93 | int ch = in.read(); |
96 | // if we hit an EOF here, go decide what type we've actually found. | |
97 | 93 | if (ch == -1) { |
98 | 10 | break; |
99 | } | |
100 | ||
101 | // we found a linebreak. Reset the line length counters on either one. We don't | |
102 | // really need to validate here. | |
103 | 83 | if (ch == '\n' || ch == '\r') { |
104 | // hit a line end, reset our line length counter | |
105 | 0 | span = 0; |
106 | } | |
107 | else { | |
108 | 83 | span++; |
109 | // the text has long lines, we can't transfer this as unencoded text. | |
110 | 83 | if (span > 998) { |
111 | 0 | containsLongLines = true; |
112 | } | |
113 | ||
114 | // non-ascii character, we have to transfer this in binary. | |
115 | 83 | if (!isAscii(ch)) { |
116 | 3 | nonAsciiChars++; |
117 | } | |
118 | else { | |
119 | 80 | asciiChars++; |
120 | } | |
121 | } | |
122 | } | |
123 | ||
124 | // looking good so far, only valid chars here. | |
125 | 10 | if (nonAsciiChars == 0) { |
126 | // does this contain long text lines? We need to use a Q-P encoding which will | |
127 | // be only slightly longer, but handles folding the longer lines. | |
128 | 8 | if (containsLongLines) { |
129 | 0 | return "quoted-printable"; |
130 | } | |
131 | else { | |
132 | // ideal! Easiest one to handle. | |
133 | 8 | return "7bit"; |
134 | } | |
135 | } | |
136 | else { | |
137 | // mostly characters requiring encoding? Base64 is our best bet. | |
138 | 2 | if (nonAsciiChars > asciiChars) { |
139 | 1 | return "base64"; |
140 | } | |
141 | else { | |
142 | // Q-P encoding will use fewer bytes than the full Base64. | |
143 | 1 | return "quoted-printable"; |
144 | } | |
145 | } | |
146 | } | |
147 | ||
148 | ||
149 | /** | |
150 | * Examine a stream of text and make a judgement on what encoding | |
151 | * type should be used for the text. Ideally, we want to use 7bit | |
152 | * encoding to determine this, but we may need to use either quoted-printable | |
153 | * or base64. The choice is made on the ratio of 7-bit characters to non-7bit. | |
154 | * | |
155 | * @param content A string for the content we're examining. | |
156 | */ | |
157 | 82 | public static String getTextTransferEncoding(String content) { |
158 | ||
159 | 82 | int asciiChars = 0; |
160 | 82 | int nonAsciiChars = 0; |
161 | ||
162 | 82 | for (int i = 0; i < content.length(); i++) { |
163 | 604 | int ch = content.charAt(i); |
164 | ||
165 | // non-ascii character, we have to transfer this in binary. | |
166 | 604 | if (!isAscii(ch)) { |
167 | 29 | nonAsciiChars++; |
168 | } | |
169 | else { | |
170 | 575 | asciiChars++; |
171 | } | |
172 | } | |
173 | ||
174 | // looking good so far, only valid chars here. | |
175 | 82 | if (nonAsciiChars == 0) { |
176 | // ideal! Easiest one to handle. | |
177 | 71 | return "7bit"; |
178 | } | |
179 | else { | |
180 | // mostly characters requiring encoding? Base64 is our best bet. | |
181 | 11 | if (nonAsciiChars > asciiChars) { |
182 | 0 | return "base64"; |
183 | } | |
184 | else { | |
185 | // Q-P encoding will use fewer bytes than the full Base64. | |
186 | 11 | return "quoted-printable"; |
187 | } | |
188 | } | |
189 | } | |
190 | ||
191 | ||
192 | /** | |
193 | * Determine if the transfer encoding looks like it might be | |
194 | * valid ascii text, and thus transferable as 7bit code. In | |
195 | * order for this to be true, all characters must be valid | |
196 | * 7-bit ASCII code AND all line breaks must be properly formed | |
197 | * (JUST '\r\n' sequences). 7-bit transfers also | |
198 | * typically have a line limit of 1000 bytes (998 + the CRLF), so any | |
199 | * stretch of charactes longer than that will also force Base64 encoding. | |
200 | * | |
201 | * @param content An input stream for the content we're examining. | |
202 | * | |
203 | * @exception IOException | |
204 | */ | |
205 | 6 | public static String getBinaryTransferEncoding(InputStream content) throws IOException { |
206 | ||
207 | // for efficiency, we'll read in blocks. | |
208 | 6 | BufferedInputStream in = new BufferedInputStream(content, 4096); |
209 | ||
210 | 6 | int previousChar = 0; |
211 | 6 | int span = 0; // span of characters without a line break. |
212 | ||
213 | 6 | while (true) { |
214 | 140 | int ch = in.read(); |
215 | // if we hit an EOF here, we've only found valid text so far, so we can transfer this as | |
216 | // 7-bit ascii. | |
217 | 140 | if (ch == -1) { |
218 | 3 | return "7bit"; |
219 | } | |
220 | ||
221 | // we found a newline, this is only valid if the previous char was the '\r' | |
222 | 137 | if (ch == '\n') { |
223 | // malformed linebreak? force this to base64 encoding. | |
224 | 2 | if (previousChar != '\r') { |
225 | 0 | return "base64"; |
226 | } | |
227 | // hit a line end, reset our line length counter | |
228 | 2 | span = 0; |
229 | } | |
230 | else { | |
231 | 135 | span++; |
232 | // the text has long lines, we can't transfer this as unencoded text. | |
233 | 135 | if (span > 998) { |
234 | 0 | return "base64"; |
235 | } | |
236 | ||
237 | // non-ascii character, we have to transfer this in binary. | |
238 | 135 | if (!isAscii(ch)) { |
239 | 3 | return "base64"; |
240 | } | |
241 | } | |
242 | 134 | previousChar = ch; |
243 | } | |
244 | } | |
245 | } |
|