|
|||||||||||||||||||
| Source file | Conditionals | Statements | Methods | TOTAL | |||||||||||||||
| ASCIIUtil.java | 85% | 90.3% | 100% | 88.8% |
|
||||||||||||||
| 1 | /** | |
| 2 | * | |
| 3 | * Copyright 2003-2006 The Apache Software Foundation | |
| 4 | * | |
| 5 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
| 6 | * you may not use this file except in compliance with the License. | |
| 7 | * You may obtain a copy of the License at | |
| 8 | * | |
| 9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
| 10 | * | |
| 11 | * Unless required by applicable law or agreed to in writing, software | |
| 12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 | * See the License for the specific language governing permissions and | |
| 15 | * limitations under the License. | |
| 16 | */ | |
| 17 | ||
| 18 | package org.apache.geronimo.mail.util; | |
| 19 | ||
| 20 | import java.io.BufferedInputStream; | |
| 21 | import java.io.InputStream; | |
| 22 | import java.io.IOException; | |
| 23 | ||
| 24 | ||
| 25 | /** | |
| 26 | * Set of utility classes for handling common encoding-related | |
| 27 | * manipulations. | |
| 28 | */ | |
| 29 | public class ASCIIUtil { | |
| 30 | ||
| 31 | /** | |
| 32 | * Test to see if this string contains only US-ASCII (i.e., 7-bit | |
| 33 | * ASCII) charactes. | |
| 34 | * | |
| 35 | * @param s The test string. | |
| 36 | * | |
| 37 | * @return true if this is a valid 7-bit ASCII encoding, false if it | |
| 38 | * contains any non-US ASCII characters. | |
| 39 | */ | |
| 40 | 4 | static public boolean isAscii(String s) { |
| 41 | 4 | for (int i = 0; i < s.length(); i++) { |
| 42 | 37 | if (!isAscii(s.charAt(i))) { |
| 43 | 1 | return false; |
| 44 | } | |
| 45 | } | |
| 46 | 3 | return true; |
| 47 | } | |
| 48 | ||
| 49 | /** | |
| 50 | * Test to see if a given character can be considered "valid" ASCII. | |
| 51 | * The excluded characters are the control characters less than | |
| 52 | * 32, 8-bit characters greater than 127, EXCEPT the CR, LF and | |
| 53 | * tab characters ARE considered value (all less than 32). | |
| 54 | * | |
| 55 | * @param ch The test character. | |
| 56 | * | |
| 57 | * @return true if this character meets the "ascii-ness" criteria, false | |
| 58 | * otherwise. | |
| 59 | */ | |
| 60 | 859 | static public boolean isAscii(int ch) { |
| 61 | // these are explicitly considered valid. | |
| 62 | 859 | if (ch == '\r' || ch == '\n' || ch == '\t') { |
| 63 | 4 | return true; |
| 64 | } | |
| 65 | ||
| 66 | // anything else outside the range is just plain wrong. | |
| 67 | 855 | if (ch >= 127 || ch < 32) { |
| 68 | 36 | return false; |
| 69 | } | |
| 70 | 819 | return true; |
| 71 | } | |
| 72 | ||
| 73 | ||
| 74 | /** | |
| 75 | * Examine a stream of text and make a judgement on what encoding | |
| 76 | * type should be used for the text. Ideally, we want to use 7bit | |
| 77 | * encoding to determine this, but we may need to use either quoted-printable | |
| 78 | * or base64. The choice is made on the ratio of 7-bit characters to non-7bit. | |
| 79 | * | |
| 80 | * @param content An input stream for the content we're examining. | |
| 81 | * | |
| 82 | * @exception IOException | |
| 83 | */ | |
| 84 | 10 | public static String getTextTransferEncoding(InputStream content) throws IOException { |
| 85 | ||
| 86 | // for efficiency, we'll read in blocks. | |
| 87 | 10 | BufferedInputStream in = new BufferedInputStream(content, 4096); |
| 88 | ||
| 89 | 10 | int span = 0; // span of characters without a line break. |
| 90 | 10 | boolean containsLongLines = false; |
| 91 | 10 | int asciiChars = 0; |
| 92 | 10 | int nonAsciiChars = 0; |
| 93 | ||
| 94 | 10 | while (true) { |
| 95 | 93 | int ch = in.read(); |
| 96 | // if we hit an EOF here, go decide what type we've actually found. | |
| 97 | 93 | if (ch == -1) { |
| 98 | 10 | break; |
| 99 | } | |
| 100 | ||
| 101 | // we found a linebreak. Reset the line length counters on either one. We don't | |
| 102 | // really need to validate here. | |
| 103 | 83 | if (ch == '\n' || ch == '\r') { |
| 104 | // hit a line end, reset our line length counter | |
| 105 | 0 | span = 0; |
| 106 | } | |
| 107 | else { | |
| 108 | 83 | span++; |
| 109 | // the text has long lines, we can't transfer this as unencoded text. | |
| 110 | 83 | if (span > 998) { |
| 111 | 0 | containsLongLines = true; |
| 112 | } | |
| 113 | ||
| 114 | // non-ascii character, we have to transfer this in binary. | |
| 115 | 83 | if (!isAscii(ch)) { |
| 116 | 3 | nonAsciiChars++; |
| 117 | } | |
| 118 | else { | |
| 119 | 80 | asciiChars++; |
| 120 | } | |
| 121 | } | |
| 122 | } | |
| 123 | ||
| 124 | // looking good so far, only valid chars here. | |
| 125 | 10 | if (nonAsciiChars == 0) { |
| 126 | // does this contain long text lines? We need to use a Q-P encoding which will | |
| 127 | // be only slightly longer, but handles folding the longer lines. | |
| 128 | 8 | if (containsLongLines) { |
| 129 | 0 | return "quoted-printable"; |
| 130 | } | |
| 131 | else { | |
| 132 | // ideal! Easiest one to handle. | |
| 133 | 8 | return "7bit"; |
| 134 | } | |
| 135 | } | |
| 136 | else { | |
| 137 | // mostly characters requiring encoding? Base64 is our best bet. | |
| 138 | 2 | if (nonAsciiChars > asciiChars) { |
| 139 | 1 | return "base64"; |
| 140 | } | |
| 141 | else { | |
| 142 | // Q-P encoding will use fewer bytes than the full Base64. | |
| 143 | 1 | return "quoted-printable"; |
| 144 | } | |
| 145 | } | |
| 146 | } | |
| 147 | ||
| 148 | ||
| 149 | /** | |
| 150 | * Examine a stream of text and make a judgement on what encoding | |
| 151 | * type should be used for the text. Ideally, we want to use 7bit | |
| 152 | * encoding to determine this, but we may need to use either quoted-printable | |
| 153 | * or base64. The choice is made on the ratio of 7-bit characters to non-7bit. | |
| 154 | * | |
| 155 | * @param content A string for the content we're examining. | |
| 156 | */ | |
| 157 | 82 | public static String getTextTransferEncoding(String content) { |
| 158 | ||
| 159 | 82 | int asciiChars = 0; |
| 160 | 82 | int nonAsciiChars = 0; |
| 161 | ||
| 162 | 82 | for (int i = 0; i < content.length(); i++) { |
| 163 | 604 | int ch = content.charAt(i); |
| 164 | ||
| 165 | // non-ascii character, we have to transfer this in binary. | |
| 166 | 604 | if (!isAscii(ch)) { |
| 167 | 29 | nonAsciiChars++; |
| 168 | } | |
| 169 | else { | |
| 170 | 575 | asciiChars++; |
| 171 | } | |
| 172 | } | |
| 173 | ||
| 174 | // looking good so far, only valid chars here. | |
| 175 | 82 | if (nonAsciiChars == 0) { |
| 176 | // ideal! Easiest one to handle. | |
| 177 | 71 | return "7bit"; |
| 178 | } | |
| 179 | else { | |
| 180 | // mostly characters requiring encoding? Base64 is our best bet. | |
| 181 | 11 | if (nonAsciiChars > asciiChars) { |
| 182 | 0 | return "base64"; |
| 183 | } | |
| 184 | else { | |
| 185 | // Q-P encoding will use fewer bytes than the full Base64. | |
| 186 | 11 | return "quoted-printable"; |
| 187 | } | |
| 188 | } | |
| 189 | } | |
| 190 | ||
| 191 | ||
| 192 | /** | |
| 193 | * Determine if the transfer encoding looks like it might be | |
| 194 | * valid ascii text, and thus transferable as 7bit code. In | |
| 195 | * order for this to be true, all characters must be valid | |
| 196 | * 7-bit ASCII code AND all line breaks must be properly formed | |
| 197 | * (JUST '\r\n' sequences). 7-bit transfers also | |
| 198 | * typically have a line limit of 1000 bytes (998 + the CRLF), so any | |
| 199 | * stretch of charactes longer than that will also force Base64 encoding. | |
| 200 | * | |
| 201 | * @param content An input stream for the content we're examining. | |
| 202 | * | |
| 203 | * @exception IOException | |
| 204 | */ | |
| 205 | 6 | public static String getBinaryTransferEncoding(InputStream content) throws IOException { |
| 206 | ||
| 207 | // for efficiency, we'll read in blocks. | |
| 208 | 6 | BufferedInputStream in = new BufferedInputStream(content, 4096); |
| 209 | ||
| 210 | 6 | int previousChar = 0; |
| 211 | 6 | int span = 0; // span of characters without a line break. |
| 212 | ||
| 213 | 6 | while (true) { |
| 214 | 140 | int ch = in.read(); |
| 215 | // if we hit an EOF here, we've only found valid text so far, so we can transfer this as | |
| 216 | // 7-bit ascii. | |
| 217 | 140 | if (ch == -1) { |
| 218 | 3 | return "7bit"; |
| 219 | } | |
| 220 | ||
| 221 | // we found a newline, this is only valid if the previous char was the '\r' | |
| 222 | 137 | if (ch == '\n') { |
| 223 | // malformed linebreak? force this to base64 encoding. | |
| 224 | 2 | if (previousChar != '\r') { |
| 225 | 0 | return "base64"; |
| 226 | } | |
| 227 | // hit a line end, reset our line length counter | |
| 228 | 2 | span = 0; |
| 229 | } | |
| 230 | else { | |
| 231 | 135 | span++; |
| 232 | // the text has long lines, we can't transfer this as unencoded text. | |
| 233 | 135 | if (span > 998) { |
| 234 | 0 | return "base64"; |
| 235 | } | |
| 236 | ||
| 237 | // non-ascii character, we have to transfer this in binary. | |
| 238 | 135 | if (!isAscii(ch)) { |
| 239 | 3 | return "base64"; |
| 240 | } | |
| 241 | } | |
| 242 | 134 | previousChar = ch; |
| 243 | } | |
| 244 | } | |
| 245 | } |
|
||||||||||