001 /** 002 * 003 * Copyright 2003-2006 The Apache Software Foundation 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.geronimo.mail.util; 019 020 import java.io.BufferedInputStream; 021 import java.io.InputStream; 022 import java.io.IOException; 023 024 025 /** 026 * Set of utility classes for handling common encoding-related 027 * manipulations. 028 */ 029 public class ASCIIUtil { 030 031 /** 032 * Test to see if this string contains only US-ASCII (i.e., 7-bit 033 * ASCII) charactes. 034 * 035 * @param s The test string. 036 * 037 * @return true if this is a valid 7-bit ASCII encoding, false if it 038 * contains any non-US ASCII characters. 039 */ 040 static public boolean isAscii(String s) { 041 for (int i = 0; i < s.length(); i++) { 042 if (!isAscii(s.charAt(i))) { 043 return false; 044 } 045 } 046 return true; 047 } 048 049 /** 050 * Test to see if a given character can be considered "valid" ASCII. 051 * The excluded characters are the control characters less than 052 * 32, 8-bit characters greater than 127, EXCEPT the CR, LF and 053 * tab characters ARE considered value (all less than 32). 054 * 055 * @param ch The test character. 056 * 057 * @return true if this character meets the "ascii-ness" criteria, false 058 * otherwise. 059 */ 060 static public boolean isAscii(int ch) { 061 // these are explicitly considered valid. 062 if (ch == '\r' || ch == '\n' || ch == '\t') { 063 return true; 064 } 065 066 // anything else outside the range is just plain wrong. 067 if (ch >= 127 || ch < 32) { 068 return false; 069 } 070 return true; 071 } 072 073 074 /** 075 * Examine a stream of text and make a judgement on what encoding 076 * type should be used for the text. Ideally, we want to use 7bit 077 * encoding to determine this, but we may need to use either quoted-printable 078 * or base64. The choice is made on the ratio of 7-bit characters to non-7bit. 079 * 080 * @param content An input stream for the content we're examining. 081 * 082 * @exception IOException 083 */ 084 public static String getTextTransferEncoding(InputStream content) throws IOException { 085 086 // for efficiency, we'll read in blocks. 087 BufferedInputStream in = new BufferedInputStream(content, 4096); 088 089 int span = 0; // span of characters without a line break. 090 boolean containsLongLines = false; 091 int asciiChars = 0; 092 int nonAsciiChars = 0; 093 094 while (true) { 095 int ch = in.read(); 096 // if we hit an EOF here, go decide what type we've actually found. 097 if (ch == -1) { 098 break; 099 } 100 101 // we found a linebreak. Reset the line length counters on either one. We don't 102 // really need to validate here. 103 if (ch == '\n' || ch == '\r') { 104 // hit a line end, reset our line length counter 105 span = 0; 106 } 107 else { 108 span++; 109 // the text has long lines, we can't transfer this as unencoded text. 110 if (span > 998) { 111 containsLongLines = true; 112 } 113 114 // non-ascii character, we have to transfer this in binary. 115 if (!isAscii(ch)) { 116 nonAsciiChars++; 117 } 118 else { 119 asciiChars++; 120 } 121 } 122 } 123 124 // looking good so far, only valid chars here. 125 if (nonAsciiChars == 0) { 126 // does this contain long text lines? We need to use a Q-P encoding which will 127 // be only slightly longer, but handles folding the longer lines. 128 if (containsLongLines) { 129 return "quoted-printable"; 130 } 131 else { 132 // ideal! Easiest one to handle. 133 return "7bit"; 134 } 135 } 136 else { 137 // mostly characters requiring encoding? Base64 is our best bet. 138 if (nonAsciiChars > asciiChars) { 139 return "base64"; 140 } 141 else { 142 // Q-P encoding will use fewer bytes than the full Base64. 143 return "quoted-printable"; 144 } 145 } 146 } 147 148 149 /** 150 * Examine a stream of text and make a judgement on what encoding 151 * type should be used for the text. Ideally, we want to use 7bit 152 * encoding to determine this, but we may need to use either quoted-printable 153 * or base64. The choice is made on the ratio of 7-bit characters to non-7bit. 154 * 155 * @param content A string for the content we're examining. 156 */ 157 public static String getTextTransferEncoding(String content) { 158 159 int asciiChars = 0; 160 int nonAsciiChars = 0; 161 162 for (int i = 0; i < content.length(); i++) { 163 int ch = content.charAt(i); 164 165 // non-ascii character, we have to transfer this in binary. 166 if (!isAscii(ch)) { 167 nonAsciiChars++; 168 } 169 else { 170 asciiChars++; 171 } 172 } 173 174 // looking good so far, only valid chars here. 175 if (nonAsciiChars == 0) { 176 // ideal! Easiest one to handle. 177 return "7bit"; 178 } 179 else { 180 // mostly characters requiring encoding? Base64 is our best bet. 181 if (nonAsciiChars > asciiChars) { 182 return "base64"; 183 } 184 else { 185 // Q-P encoding will use fewer bytes than the full Base64. 186 return "quoted-printable"; 187 } 188 } 189 } 190 191 192 /** 193 * Determine if the transfer encoding looks like it might be 194 * valid ascii text, and thus transferable as 7bit code. In 195 * order for this to be true, all characters must be valid 196 * 7-bit ASCII code AND all line breaks must be properly formed 197 * (JUST '\r\n' sequences). 7-bit transfers also 198 * typically have a line limit of 1000 bytes (998 + the CRLF), so any 199 * stretch of charactes longer than that will also force Base64 encoding. 200 * 201 * @param content An input stream for the content we're examining. 202 * 203 * @exception IOException 204 */ 205 public static String getBinaryTransferEncoding(InputStream content) throws IOException { 206 207 // for efficiency, we'll read in blocks. 208 BufferedInputStream in = new BufferedInputStream(content, 4096); 209 210 int previousChar = 0; 211 int span = 0; // span of characters without a line break. 212 213 while (true) { 214 int ch = in.read(); 215 // if we hit an EOF here, we've only found valid text so far, so we can transfer this as 216 // 7-bit ascii. 217 if (ch == -1) { 218 return "7bit"; 219 } 220 221 // we found a newline, this is only valid if the previous char was the '\r' 222 if (ch == '\n') { 223 // malformed linebreak? force this to base64 encoding. 224 if (previousChar != '\r') { 225 return "base64"; 226 } 227 // hit a line end, reset our line length counter 228 span = 0; 229 } 230 else { 231 span++; 232 // the text has long lines, we can't transfer this as unencoded text. 233 if (span > 998) { 234 return "base64"; 235 } 236 237 // non-ascii character, we have to transfer this in binary. 238 if (!isAscii(ch)) { 239 return "base64"; 240 } 241 } 242 previousChar = ch; 243 } 244 } 245 }