1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 20 package org.apache.geronimo.mail.util; 21 22 import java.io.BufferedInputStream; 23 import java.io.InputStream; 24 import java.io.IOException; 25 26 /** 27 * Set of utility classes for handling common encoding-related 28 * manipulations. 29 */ 30 public class ASCIIUtil { 31 32 /** 33 * Test to see if this string contains only US-ASCII (i.e., 7-bit 34 * ASCII) charactes. 35 * 36 * @param s The test string. 37 * 38 * @return true if this is a valid 7-bit ASCII encoding, false if it 39 * contains any non-US ASCII characters. 40 */ 41 static public boolean isAscii(String s) { 42 for (int i = 0; i < s.length(); i++) { 43 if (!isAscii(s.charAt(i))) { 44 return false; 45 } 46 } 47 return true; 48 } 49 50 /** 51 * Test to see if a given character can be considered "valid" ASCII. 52 * The excluded characters are the control characters less than 53 * 32, 8-bit characters greater than 127, EXCEPT the CR, LF and 54 * tab characters ARE considered value (all less than 32). 55 * 56 * @param ch The test character. 57 * 58 * @return true if this character meets the "ascii-ness" criteria, false 59 * otherwise. 60 */ 61 static public boolean isAscii(int ch) { 62 // these are explicitly considered valid. 63 if (ch == '\r' || ch == '\n' || ch == '\t') { 64 return true; 65 } 66 67 // anything else outside the range is just plain wrong. 68 if (ch >= 127 || ch < 32) { 69 return false; 70 } 71 return true; 72 } 73 74 75 /** 76 * Examine a stream of text and make a judgement on what encoding 77 * type should be used for the text. Ideally, we want to use 7bit 78 * encoding to determine this, but we may need to use either quoted-printable 79 * or base64. The choice is made on the ratio of 7-bit characters to non-7bit. 80 * 81 * @param content An input stream for the content we're examining. 82 * 83 * @exception IOException 84 */ 85 public static String getTextTransferEncoding(InputStream content) throws IOException { 86 87 // for efficiency, we'll read in blocks. 88 BufferedInputStream in = new BufferedInputStream(content, 4096); 89 90 int span = 0; // span of characters without a line break. 91 boolean containsLongLines = false; 92 int asciiChars = 0; 93 int nonAsciiChars = 0; 94 95 while (true) { 96 int ch = in.read(); 97 // if we hit an EOF here, go decide what type we've actually found. 98 if (ch == -1) { 99 break; 100 } 101 102 // we found a linebreak. Reset the line length counters on either one. We don't 103 // really need to validate here. 104 if (ch == '\n' || ch == '\r') { 105 // hit a line end, reset our line length counter 106 span = 0; 107 } 108 else { 109 span++; 110 // the text has long lines, we can't transfer this as unencoded text. 111 if (span > 998) { 112 containsLongLines = true; 113 } 114 115 // non-ascii character, we have to transfer this in binary. 116 if (!isAscii(ch)) { 117 nonAsciiChars++; 118 } 119 else { 120 asciiChars++; 121 } 122 } 123 } 124 125 // looking good so far, only valid chars here. 126 if (nonAsciiChars == 0) { 127 // does this contain long text lines? We need to use a Q-P encoding which will 128 // be only slightly longer, but handles folding the longer lines. 129 if (containsLongLines) { 130 return "quoted-printable"; 131 } 132 else { 133 // ideal! Easiest one to handle. 134 return "7bit"; 135 } 136 } 137 else { 138 // mostly characters requiring encoding? Base64 is our best bet. 139 if (nonAsciiChars > asciiChars) { 140 return "base64"; 141 } 142 else { 143 // Q-P encoding will use fewer bytes than the full Base64. 144 return "quoted-printable"; 145 } 146 } 147 } 148 149 150 /** 151 * Examine a stream of text and make a judgement on what encoding 152 * type should be used for the text. Ideally, we want to use 7bit 153 * encoding to determine this, but we may need to use either quoted-printable 154 * or base64. The choice is made on the ratio of 7-bit characters to non-7bit. 155 * 156 * @param content A string for the content we're examining. 157 */ 158 public static String getTextTransferEncoding(String content) { 159 160 int asciiChars = 0; 161 int nonAsciiChars = 0; 162 163 for (int i = 0; i < content.length(); i++) { 164 int ch = content.charAt(i); 165 166 // non-ascii character, we have to transfer this in binary. 167 if (!isAscii(ch)) { 168 nonAsciiChars++; 169 } 170 else { 171 asciiChars++; 172 } 173 } 174 175 // looking good so far, only valid chars here. 176 if (nonAsciiChars == 0) { 177 // ideal! Easiest one to handle. 178 return "7bit"; 179 } 180 else { 181 // mostly characters requiring encoding? Base64 is our best bet. 182 if (nonAsciiChars > asciiChars) { 183 return "base64"; 184 } 185 else { 186 // Q-P encoding will use fewer bytes than the full Base64. 187 return "quoted-printable"; 188 } 189 } 190 } 191 192 193 /** 194 * Determine if the transfer encoding looks like it might be 195 * valid ascii text, and thus transferable as 7bit code. In 196 * order for this to be true, all characters must be valid 197 * 7-bit ASCII code AND all line breaks must be properly formed 198 * (JUST '\r\n' sequences). 7-bit transfers also 199 * typically have a line limit of 1000 bytes (998 + the CRLF), so any 200 * stretch of charactes longer than that will also force Base64 encoding. 201 * 202 * @param content An input stream for the content we're examining. 203 * 204 * @exception IOException 205 */ 206 public static String getBinaryTransferEncoding(InputStream content) throws IOException { 207 208 // for efficiency, we'll read in blocks. 209 BufferedInputStream in = new BufferedInputStream(content, 4096); 210 211 int previousChar = 0; 212 int span = 0; // span of characters without a line break. 213 214 while (true) { 215 int ch = in.read(); 216 // if we hit an EOF here, we've only found valid text so far, so we can transfer this as 217 // 7-bit ascii. 218 if (ch == -1) { 219 return "7bit"; 220 } 221 222 // we found a newline, this is only valid if the previous char was the '\r' 223 if (ch == '\n') { 224 // malformed linebreak? force this to base64 encoding. 225 if (previousChar != '\r') { 226 return "base64"; 227 } 228 // hit a line end, reset our line length counter 229 span = 0; 230 } 231 else { 232 span++; 233 // the text has long lines, we can't transfer this as unencoded text. 234 if (span > 998) { 235 return "base64"; 236 } 237 238 // non-ascii character, we have to transfer this in binary. 239 if (!isAscii(ch)) { 240 return "base64"; 241 } 242 } 243 previousChar = ch; 244 } 245 } 246 }