001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 020 package org.apache.geronimo.mail.util; 021 022 import java.io.BufferedInputStream; 023 import java.io.InputStream; 024 import java.io.IOException; 025 026 /** 027 * Set of utility classes for handling common encoding-related 028 * manipulations. 029 */ 030 public class ASCIIUtil { 031 032 /** 033 * Test to see if this string contains only US-ASCII (i.e., 7-bit 034 * ASCII) charactes. 035 * 036 * @param s The test string. 037 * 038 * @return true if this is a valid 7-bit ASCII encoding, false if it 039 * contains any non-US ASCII characters. 040 */ 041 static public boolean isAscii(String s) { 042 for (int i = 0; i < s.length(); i++) { 043 if (!isAscii(s.charAt(i))) { 044 return false; 045 } 046 } 047 return true; 048 } 049 050 /** 051 * Test to see if a given character can be considered "valid" ASCII. 052 * The excluded characters are the control characters less than 053 * 32, 8-bit characters greater than 127, EXCEPT the CR, LF and 054 * tab characters ARE considered value (all less than 32). 055 * 056 * @param ch The test character. 057 * 058 * @return true if this character meets the "ascii-ness" criteria, false 059 * otherwise. 060 */ 061 static public boolean isAscii(int ch) { 062 // these are explicitly considered valid. 063 if (ch == '\r' || ch == '\n' || ch == '\t') { 064 return true; 065 } 066 067 // anything else outside the range is just plain wrong. 068 if (ch >= 127 || ch < 32) { 069 return false; 070 } 071 return true; 072 } 073 074 075 /** 076 * Examine a stream of text and make a judgement on what encoding 077 * type should be used for the text. Ideally, we want to use 7bit 078 * encoding to determine this, but we may need to use either quoted-printable 079 * or base64. The choice is made on the ratio of 7-bit characters to non-7bit. 080 * 081 * @param content An input stream for the content we're examining. 082 * 083 * @exception IOException 084 */ 085 public static String getTextTransferEncoding(InputStream content) throws IOException { 086 087 // for efficiency, we'll read in blocks. 088 BufferedInputStream in = new BufferedInputStream(content, 4096); 089 090 int span = 0; // span of characters without a line break. 091 boolean containsLongLines = false; 092 int asciiChars = 0; 093 int nonAsciiChars = 0; 094 095 while (true) { 096 int ch = in.read(); 097 // if we hit an EOF here, go decide what type we've actually found. 098 if (ch == -1) { 099 break; 100 } 101 102 // we found a linebreak. Reset the line length counters on either one. We don't 103 // really need to validate here. 104 if (ch == '\n' || ch == '\r') { 105 // hit a line end, reset our line length counter 106 span = 0; 107 } 108 else { 109 span++; 110 // the text has long lines, we can't transfer this as unencoded text. 111 if (span > 998) { 112 containsLongLines = true; 113 } 114 115 // non-ascii character, we have to transfer this in binary. 116 if (!isAscii(ch)) { 117 nonAsciiChars++; 118 } 119 else { 120 asciiChars++; 121 } 122 } 123 } 124 125 // looking good so far, only valid chars here. 126 if (nonAsciiChars == 0) { 127 // does this contain long text lines? We need to use a Q-P encoding which will 128 // be only slightly longer, but handles folding the longer lines. 129 if (containsLongLines) { 130 return "quoted-printable"; 131 } 132 else { 133 // ideal! Easiest one to handle. 134 return "7bit"; 135 } 136 } 137 else { 138 // mostly characters requiring encoding? Base64 is our best bet. 139 if (nonAsciiChars > asciiChars) { 140 return "base64"; 141 } 142 else { 143 // Q-P encoding will use fewer bytes than the full Base64. 144 return "quoted-printable"; 145 } 146 } 147 } 148 149 150 /** 151 * Examine a stream of text and make a judgement on what encoding 152 * type should be used for the text. Ideally, we want to use 7bit 153 * encoding to determine this, but we may need to use either quoted-printable 154 * or base64. The choice is made on the ratio of 7-bit characters to non-7bit. 155 * 156 * @param content A string for the content we're examining. 157 */ 158 public static String getTextTransferEncoding(String content) { 159 160 int asciiChars = 0; 161 int nonAsciiChars = 0; 162 163 for (int i = 0; i < content.length(); i++) { 164 int ch = content.charAt(i); 165 166 // non-ascii character, we have to transfer this in binary. 167 if (!isAscii(ch)) { 168 nonAsciiChars++; 169 } 170 else { 171 asciiChars++; 172 } 173 } 174 175 // looking good so far, only valid chars here. 176 if (nonAsciiChars == 0) { 177 // ideal! Easiest one to handle. 178 return "7bit"; 179 } 180 else { 181 // mostly characters requiring encoding? Base64 is our best bet. 182 if (nonAsciiChars > asciiChars) { 183 return "base64"; 184 } 185 else { 186 // Q-P encoding will use fewer bytes than the full Base64. 187 return "quoted-printable"; 188 } 189 } 190 } 191 192 193 /** 194 * Determine if the transfer encoding looks like it might be 195 * valid ascii text, and thus transferable as 7bit code. In 196 * order for this to be true, all characters must be valid 197 * 7-bit ASCII code AND all line breaks must be properly formed 198 * (JUST '\r\n' sequences). 7-bit transfers also 199 * typically have a line limit of 1000 bytes (998 + the CRLF), so any 200 * stretch of charactes longer than that will also force Base64 encoding. 201 * 202 * @param content An input stream for the content we're examining. 203 * 204 * @exception IOException 205 */ 206 public static String getBinaryTransferEncoding(InputStream content) throws IOException { 207 208 // for efficiency, we'll read in blocks. 209 BufferedInputStream in = new BufferedInputStream(content, 4096); 210 211 int previousChar = 0; 212 int span = 0; // span of characters without a line break. 213 214 while (true) { 215 int ch = in.read(); 216 // if we hit an EOF here, we've only found valid text so far, so we can transfer this as 217 // 7-bit ascii. 218 if (ch == -1) { 219 return "7bit"; 220 } 221 222 // we found a newline, this is only valid if the previous char was the '\r' 223 if (ch == '\n') { 224 // malformed linebreak? force this to base64 encoding. 225 if (previousChar != '\r') { 226 return "base64"; 227 } 228 // hit a line end, reset our line length counter 229 span = 0; 230 } 231 else { 232 span++; 233 // the text has long lines, we can't transfer this as unencoded text. 234 if (span > 998) { 235 return "base64"; 236 } 237 238 // non-ascii character, we have to transfer this in binary. 239 if (!isAscii(ch)) { 240 return "base64"; 241 } 242 } 243 previousChar = ch; 244 } 245 } 246 }