1 /**
2 *
3 * Copyright 2003-2006 The Apache Software Foundation
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package javax.mail.internet;
19
20 /**
21 * @version $Rev: 421852 $ $Date: 2006-07-14 03:02:19 -0700 (Fri, 14 Jul 2006) $
22 */
23 public class HeaderTokenizer {
24 public static class Token {
25
26 public static final int ATOM = -1;
27 public static final int COMMENT = -3;
28 public static final int EOF = -4;
29 public static final int QUOTEDSTRING = -2;
30 private int _type;
31 private String _value;
32
33 public Token(int type, String value) {
34 _type = type;
35 _value = value;
36 }
37
38 public int getType() {
39 return _type;
40 }
41
42 public String getValue() {
43 return _value;
44 }
45 }
46
47 private static final Token EOF = new Token(Token.EOF, null);
48
49 public static final String MIME = "()<>@,;:\\\"\t []/?=";
50
51 public static final String RFC822 = "()<>@,;:\\\"\t .[]";
52 private static final String WHITE = " \t\n\r";
53 private String _delimiters;
54 private String _header;
55 private boolean _skip;
56 private int pos;
57
58 public HeaderTokenizer(String header) {
59 this(header, RFC822);
60 }
61
62 public HeaderTokenizer(String header, String delimiters) {
63 this(header, delimiters, true);
64 }
65
66 public HeaderTokenizer(String header,
67 String delimiters,
68 boolean skipComments) {
69 _skip = skipComments;
70 _header = header;
71 _delimiters = delimiters;
72 }
73
74 public String getRemainder() {
75 return _header.substring(pos);
76 }
77
78 public Token next() throws ParseException {
79 return readToken();
80 }
81
82 public Token peek() throws ParseException {
83 int start = pos;
84 try {
85 return readToken();
86 } finally {
87 pos = start;
88 }
89 }
90
91 /**
92 * Read an ATOM token from the parsed header.
93 *
94 * @return A token containing the value of the atom token.
95 */
96 private Token readAtomicToken() {
97
98 int start = pos;
99 while (++pos < _header.length()) {
100
101 char ch = _header.charAt(pos);
102 if (_delimiters.indexOf(_header.charAt(pos)) != -1 || ch < 32 || ch >= 127) {
103 break;
104 }
105 }
106
107 return new Token(Token.ATOM, _header.substring(start, pos));
108 }
109
110 /**
111 * Read the next token from the header.
112 *
113 * @return The next token from the header. White space is skipped, and comment
114 * tokens are also skipped if indicated.
115 * @exception ParseException
116 */
117 private Token readToken() throws ParseException {
118 if (pos >= _header.length()) {
119 return EOF;
120 } else {
121 char c = _header.charAt(pos);
122
123 if (c == '(') {
124 Token comment = readComment();
125 if (_skip) {
126 return readToken();
127 } else {
128 return comment;
129 }
130
131 } else if (c == '\"') {
132 return readQuotedString();
133
134 } else if (WHITE.indexOf(c) != -1) {
135 eatWhiteSpace();
136 return readToken();
137
138 } else if (c < 32 || c >= 127 || _delimiters.indexOf(c) != -1) {
139 pos++;
140 return new Token((int)c, String.valueOf(c));
141 } else {
142
143 return readAtomicToken();
144 }
145 }
146 }
147
148 /**
149 * Extract a substring from the header string and apply any
150 * escaping/folding rules to the string.
151 *
152 * @param start The starting offset in the header.
153 * @param end The header end offset + 1.
154 *
155 * @return The processed string value.
156 * @exception ParseException
157 */
158 private String getEscapedValue(int start, int end) throws ParseException {
159 StringBuffer value = new StringBuffer();
160
161 for (int i = start; i < end; i++) {
162 char ch = _header.charAt(i);
163
164 if (ch == '\\') {
165 i++;
166 if (i == end) {
167 throw new ParseException("Invalid escape character");
168 }
169 value.append(_header.charAt(i));
170 }
171
172
173 else if (ch == '\r') {
174
175 if (i < end - 1 && _header.charAt(i + 1) == '\n') {
176 i++;
177 }
178 }
179 else {
180
181 value.append(ch);
182 }
183 }
184 return value.toString();
185 }
186
187 /**
188 * Read a comment from the header, applying nesting and escape
189 * rules to the content.
190 *
191 * @return A comment token with the token value.
192 * @exception ParseException
193 */
194 private Token readComment() throws ParseException {
195 int start = pos + 1;
196 int nesting = 1;
197
198 boolean requiresEscaping = false;
199
200
201 while (++pos < _header.length()) {
202 char ch = _header.charAt(pos);
203 if (ch == ')') {
204 nesting--;
205 if (nesting == 0) {
206 break;
207 }
208 }
209 else if (ch == '(') {
210 nesting++;
211 }
212 else if (ch == '\\') {
213 pos++;
214 requiresEscaping = true;
215 }
216
217 else if (ch == '\r') {
218 requiresEscaping = true;
219 }
220 }
221
222 if (nesting != 0) {
223 throw new ParseException("Unbalanced comments");
224 }
225
226 String value;
227 if (requiresEscaping) {
228 value = getEscapedValue(start, pos);
229 }
230 else {
231 value = _header.substring(start, pos++);
232 }
233 return new Token(Token.COMMENT, value);
234 }
235
236 /**
237 * Parse out a quoted string from the header, applying escaping
238 * rules to the value.
239 *
240 * @return The QUOTEDSTRING token with the value.
241 * @exception ParseException
242 */
243 private Token readQuotedString() throws ParseException {
244 int start = pos+1;
245 boolean requiresEscaping = false;
246
247
248 while (++pos < _header.length()) {
249 char ch = _header.charAt(pos);
250 if (ch == '"') {
251 String value;
252 if (requiresEscaping) {
253 value = getEscapedValue(start, pos);
254 }
255 else {
256 value = _header.substring(start, pos++);
257 }
258 return new Token(Token.QUOTEDSTRING, value);
259 }
260 else if (ch == '\\') {
261 pos++;
262 requiresEscaping = true;
263 }
264
265 else if (ch == '\r') {
266 requiresEscaping = true;
267 }
268 }
269
270 throw new ParseException("Missing '\"'");
271 }
272
273 /**
274 * Skip white space in the token string.
275 */
276 private void eatWhiteSpace() {
277
278 while (++pos < _header.length()
279 && WHITE.indexOf(_header.charAt(pos)) != -1)
280 ;
281 }
282 }