XMLEncodingDetector xref

View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   * ====================================================================
17   *
18   * This software consists of voluntary contributions made by many
19   * individuals on behalf of the Apache Software Foundation and was
20   * originally based on software copyright (c) 1999, International
21   * Business Machines, Inc., http://www.apache.org.  For more
22   * information on the Apache Software Foundation, please see
23   * <http://www.apache.org/>.
24   */
25  
26  package org.apache.jasper.xmlparser;
27  
28  import java.io.EOFException;
29  import java.io.InputStream;
30  import java.io.InputStreamReader;
31  import java.io.IOException;
32  import java.io.Reader;
33  import java.util.Locale;
34  import java.util.jar.JarFile;
35  
36  import org.apache.jasper.JasperException;
37  import org.apache.jasper.JspCompilationContext;
38  import org.apache.jasper.compiler.ErrorDispatcher;
39  import org.apache.jasper.compiler.JspUtil;
40  
41  public class XMLEncodingDetector {
42      
43      private InputStream stream;
44      private String encoding;
45      private boolean isEncodingSetInProlog;
46      private boolean isBomPresent;
47      private int skip;
48      private Boolean isBigEndian;
49      private Reader reader;
50      
51      // org.apache.xerces.impl.XMLEntityManager fields
52      public static final int DEFAULT_BUFFER_SIZE = 2048;
53      public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64;
54      private boolean fAllowJavaEncodings;
55      private SymbolTable fSymbolTable;
56      private XMLEncodingDetector fCurrentEntity;
57      private int fBufferSize = DEFAULT_BUFFER_SIZE;
58      
59      // org.apache.xerces.impl.XMLEntityManager.ScannedEntity fields
60      private int lineNumber = 1;
61      private int columnNumber = 1;
62      private boolean literal;
63      private char[] ch = new char[DEFAULT_BUFFER_SIZE];
64      private int position;
65      private int count;
66      private boolean mayReadChunks = false;
67      
68      // org.apache.xerces.impl.XMLScanner fields
69      private XMLString fString = new XMLString();    
70      private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
71      private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
72      private final static String fVersionSymbol = "version";
73      private final static String fEncodingSymbol = "encoding";
74      private final static String fStandaloneSymbol = "standalone";
75      
76      // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl fields
77      private int fMarkupDepth = 0;
78      private String[] fStrings = new String[3];
79  
80      private ErrorDispatcher err;
81  
82      /**
83       * Constructor
84       */
85      public XMLEncodingDetector() {
86          fSymbolTable = new SymbolTable();
87          fCurrentEntity = this;
88      }
89  
90      /**
91       * Autodetects the encoding of the XML document supplied by the given
92       * input stream.
93       *
94       * Encoding autodetection is done according to the XML 1.0 specification,
95       * Appendix F.1: Detection Without External Encoding Information.
96       *
97       * @return Two-element array, where the first element (of type
98       * java.lang.String) contains the name of the (auto)detected encoding, and
99       * the second element (of type java.lang.Boolean) specifies whether the 
100      * encoding was specified using the 'encoding' attribute of an XML prolog
101      * (TRUE) or autodetected (FALSE).
102      */
103     public static Object[] getEncoding(String fname, JarFile jarFile,
104                                        JspCompilationContext ctxt,
105                                        ErrorDispatcher err)
106         throws IOException, JasperException
107     {
108         InputStream inStream = JspUtil.getInputStream(fname, jarFile, ctxt,
109                                                       err);
110         XMLEncodingDetector detector = new XMLEncodingDetector();
111         Object[] ret = detector.getEncoding(inStream, err);
112         inStream.close();
113 
114         return ret;
115     }
116 
117     private Object[] getEncoding(InputStream in, ErrorDispatcher err)
118         throws IOException, JasperException
119     {
120         this.stream = in;
121         this.err=err;
122         createInitialReader();
123         scanXMLDecl();
124 	
125         return new Object[] { this.encoding,
126                               Boolean.valueOf(this.isEncodingSetInProlog),
127                               Boolean.valueOf(this.isBomPresent),
128                               Integer.valueOf(this.skip) };
129     }
130     
131     // stub method
132     void endEntity() {
133     }
134     
135     // Adapted from:
136     // org.apache.xerces.impl.XMLEntityManager.startEntity()
137     private void createInitialReader() throws IOException, JasperException {
138 
139 	// wrap this stream in RewindableInputStream
140 	stream = new RewindableInputStream(stream);
141 
142 	// perform auto-detect of encoding if necessary
143 	if (encoding == null) {
144 	    // read first four bytes and determine encoding
145 	    final byte[] b4 = new byte[4];
146 	    int count = 0;
147 	    for (; count<4; count++ ) {
148 		b4[count] = (byte)stream.read();
149 	    }
150 	    if (count == 4) {
151 		Object [] encodingDesc = getEncodingName(b4, count);
152 		encoding = (String)(encodingDesc[0]);
153 		isBigEndian = (Boolean)(encodingDesc[1]);
154         
155         if (encodingDesc.length > 3) {
156             isBomPresent = (Boolean)(encodingDesc[2]);
157             skip = (Integer)(encodingDesc[3]);
158         } else {
159             isBomPresent = true;
160             skip = (Integer)(encodingDesc[2]);
161         }
162 
163 		stream.reset();
164 		// Special case UTF-8 files with BOM created by Microsoft
165 		// tools. It's more efficient to consume the BOM than make
166 		// the reader perform extra checks. -Ac
167 		if (count > 2 && encoding.equals("UTF-8")) {
168 		    int b0 = b4[0] & 0xFF;
169 		    int b1 = b4[1] & 0xFF;
170 		    int b2 = b4[2] & 0xFF;
171 		    if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
172 			// ignore first three bytes...
173 			stream.skip(3);
174 		    }
175 		}
176 		reader = createReader(stream, encoding, isBigEndian);
177 	    } else {
178 		reader = createReader(stream, encoding, isBigEndian);
179 	    }
180 	}
181     }
182 
183     // Adapted from:
184     // org.apache.xerces.impl.XMLEntityManager.createReader
185     /**
186      * Creates a reader capable of reading the given input stream in
187      * the specified encoding.
188      *
189      * @param inputStream  The input stream.
190      * @param encoding     The encoding name that the input stream is
191      *                     encoded using. If the user has specified that
192      *                     Java encoding names are allowed, then the
193      *                     encoding name may be a Java encoding name;
194      *                     otherwise, it is an ianaEncoding name.
195      * @param isBigEndian   For encodings (like uCS-4), whose names cannot
196      *                      specify a byte order, this tells whether the order
197      *                      is bigEndian. null means unknown or not relevant.
198      *
199      * @return Returns a reader.
200      */
201     private Reader createReader(InputStream inputStream, String encoding,
202 				Boolean isBigEndian)
203                 throws IOException, JasperException {
204 
205         // normalize encoding name
206         if (encoding == null) {
207             encoding = "UTF-8";
208         }
209 
210         // try to use an optimized reader
211         String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
212         if (ENCODING.equals("UTF-8")) {
213             return new UTF8Reader(inputStream, fBufferSize);
214         }
215         if (ENCODING.equals("US-ASCII")) {
216             return new ASCIIReader(inputStream, fBufferSize);
217         }
218         if (ENCODING.equals("ISO-10646-UCS-4")) {
219             if (isBigEndian != null) {
220                 boolean isBE = isBigEndian.booleanValue();
221                 if (isBE) {
222                     return new UCSReader(inputStream, UCSReader.UCS4BE);
223                 } else {
224                     return new UCSReader(inputStream, UCSReader.UCS4LE);
225                 }
226             } else {
227                 err.jspError("jsp.error.xml.encodingByteOrderUnsupported",
228 			     encoding);
229             }
230         }
231         if (ENCODING.equals("ISO-10646-UCS-2")) {
232             if (isBigEndian != null) { // sould never happen with this encoding...
233                 boolean isBE = isBigEndian.booleanValue();
234                 if (isBE) {
235                     return new UCSReader(inputStream, UCSReader.UCS2BE);
236                 } else {
237                     return new UCSReader(inputStream, UCSReader.UCS2LE);
238                 }
239             } else {
240                 err.jspError("jsp.error.xml.encodingByteOrderUnsupported",
241 			     encoding);
242             }
243         }
244 
245         // check for valid name
246         boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
247         boolean validJava = XMLChar.isValidJavaEncoding(encoding);
248         if (!validIANA || (fAllowJavaEncodings && !validJava)) {
249             err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
250             // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
251             //       because every byte is a valid ISO Latin 1 character.
252             //       It may not translate correctly but if we failed on
253             //       the encoding anyway, then we're expecting the content
254             //       of the document to be bad. This will just prevent an
255             //       invalid UTF-8 sequence to be detected. This is only
256             //       important when continue-after-fatal-error is turned
257             //       on. -Ac
258             encoding = "ISO-8859-1";
259         }
260 
261         // try to use a Java reader
262         String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
263         if (javaEncoding == null) {
264             if (fAllowJavaEncodings) {
265 		javaEncoding = encoding;
266             } else {
267                 err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
268                 // see comment above.
269                 javaEncoding = "ISO8859_1";
270             }
271         }
272         return new InputStreamReader(inputStream, javaEncoding);
273 
274     } // createReader(InputStream,String, Boolean): Reader
275 
276     // Adapted from:
277     // org.apache.xerces.impl.XMLEntityManager.getEncodingName
278     /**
279      * Returns the IANA encoding name that is auto-detected from
280      * the bytes specified, with the endian-ness of that encoding where
281      * appropriate.
282      *
283      * @param b4    The first four bytes of the input.
284      * @param count The number of bytes actually read.
285      * @return a 2-element array:  the first element, an IANA-encoding string,
286      *  the second element a Boolean which is true iff the document is big
287      *  endian, false if it's little-endian, and null if the distinction isn't
288      *  relevant.
289      */
290     private Object[] getEncodingName(byte[] b4, int count) {
291 
292         if (count < 2) {
293             return new Object[]{"UTF-8", null, Boolean.FALSE, Integer.valueOf(0)};
294         }
295 
296         // UTF-16, with BOM
297         int b0 = b4[0] & 0xFF;
298         int b1 = b4[1] & 0xFF;
299         if (b0 == 0xFE && b1 == 0xFF) {
300             // UTF-16, big-endian
301             return new Object [] {"UTF-16BE", Boolean.TRUE, Integer.valueOf(2)};
302         }
303         if (b0 == 0xFF && b1 == 0xFE) {
304             // UTF-16, little-endian
305             return new Object [] {"UTF-16LE", Boolean.FALSE, Integer.valueOf(2)};
306         }
307 
308         // default to UTF-8 if we don't have enough bytes to make a
309         // good determination of the encoding
310         if (count < 3) {
311             return new Object [] {"UTF-8", null, Boolean.FALSE, Integer.valueOf(0)};
312         }
313 
314         // UTF-8 with a BOM
315         int b2 = b4[2] & 0xFF;
316         if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
317             return new Object [] {"UTF-8", null, Integer.valueOf(3)};
318         }
319 
320         // default to UTF-8 if we don't have enough bytes to make a
321         // good determination of the encoding
322         if (count < 4) {
323             return new Object [] {"UTF-8", null, Integer.valueOf(0)};
324         }
325 
326         // other encodings
327         int b3 = b4[3] & 0xFF;
328         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
329             // UCS-4, big endian (1234)
330             return new Object [] {"ISO-10646-UCS-4", new Boolean(true), Integer.valueOf(4)};
331         }
332         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
333             // UCS-4, little endian (4321)
334             return new Object [] {"ISO-10646-UCS-4", new Boolean(false), Integer.valueOf(4)};
335         }
336         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
337             // UCS-4, unusual octet order (2143)
338             // REVISIT: What should this be?
339             return new Object [] {"ISO-10646-UCS-4", null, Integer.valueOf(4)};
340         }
341         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
342             // UCS-4, unusual octect order (3412)
343             // REVISIT: What should this be?
344             return new Object [] {"ISO-10646-UCS-4", null, Integer.valueOf(4)};
345         }
346         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
347             // UTF-16, big-endian, no BOM
348             // (or could turn out to be UCS-2...
349             // REVISIT: What should this be?
350             return new Object [] {"UTF-16BE", new Boolean(true), Integer.valueOf(4)};
351         }
352         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
353             // UTF-16, little-endian, no BOM
354             // (or could turn out to be UCS-2...
355             return new Object [] {"UTF-16LE", new Boolean(false), Integer.valueOf(4)};
356         }
357         if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
358             // EBCDIC
359             // a la xerces1, return CP037 instead of EBCDIC here
360             return new Object [] {"CP037", null, Integer.valueOf(4)};
361         }
362 
363         // default encoding
364         return new Object [] {"UTF-8", null, Boolean.FALSE, Integer.valueOf(0)};
365 
366     }
367 
368     // Adapted from:
369     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.isExternal
370     /** Returns true if the current entity being scanned is external. */
371     public boolean isExternal() {
372 	return true;
373     }
374 
375     // Adapted from:
376     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.peekChar
377     /**
378      * Returns the next character on the input.
379      * <p>
380      * <strong>Note:</strong> The character is <em>not</em> consumed.
381      *
382      * @throws IOException  Thrown if i/o error occurs.
383      * @throws EOFException Thrown on end of file.
384      */
385     public int peekChar() throws IOException {
386 	
387 	// load more characters, if needed
388 	if (fCurrentEntity.position == fCurrentEntity.count) {
389 	    load(0, true);
390 	}
391 	
392 	// peek at character
393 	int c = fCurrentEntity.ch[fCurrentEntity.position];
394 
395 	// return peeked character
396 	if (fCurrentEntity.isExternal()) {
397 	    return c != '\r' ? c : '\n';
398 	}
399 	else {
400 	    return c;
401 	}
402 	
403     } // peekChar():int
404     
405     // Adapted from:
406     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanChar
407     /**
408      * Returns the next character on the input.
409      * <p>
410      * <strong>Note:</strong> The character is consumed.
411      *
412      * @throws IOException  Thrown if i/o error occurs.
413      * @throws EOFException Thrown on end of file.
414      */
415     public int scanChar() throws IOException {
416 
417 	// load more characters, if needed
418 	if (fCurrentEntity.position == fCurrentEntity.count) {
419 	    load(0, true);
420 	}
421 
422 	// scan character
423 	int c = fCurrentEntity.ch[fCurrentEntity.position++];
424 	boolean external = false;
425 	if (c == '\n' ||
426 	    (c == '\r' && (external = fCurrentEntity.isExternal()))) {
427 	    fCurrentEntity.lineNumber++;
428 	    fCurrentEntity.columnNumber = 1;
429 	    if (fCurrentEntity.position == fCurrentEntity.count) {
430 		fCurrentEntity.ch[0] = (char)c;
431 		load(1, false);
432 	    }
433 	    if (c == '\r' && external) {
434 		if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
435 		    fCurrentEntity.position--;
436 		}
437 		c = '\n';
438 	    }
439 	}
440 
441 	// return character that was scanned
442 	fCurrentEntity.columnNumber++;
443 	return c;
444 	
445     }
446 
447     // Adapted from:
448     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanName
449     /**
450      * Returns a string matching the Name production appearing immediately
451      * on the input as a symbol, or null if no Name string is present.
452      * <p>
453      * <strong>Note:</strong> The Name characters are consumed.
454      * <p>
455      * <strong>Note:</strong> The string returned must be a symbol. The
456      * SymbolTable can be used for this purpose.
457      *
458      * @throws IOException  Thrown if i/o error occurs.
459      * @throws EOFException Thrown on end of file.
460      *
461      * @see SymbolTable
462      * @see XMLChar#isName
463      * @see XMLChar#isNameStart
464      */
465     public String scanName() throws IOException {
466 	
467 	// load more characters, if needed
468 	if (fCurrentEntity.position == fCurrentEntity.count) {
469 	    load(0, true);
470 	}
471 	
472 	// scan name
473 	int offset = fCurrentEntity.position;
474 	if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
475 	    if (++fCurrentEntity.position == fCurrentEntity.count) {
476 		fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
477 		offset = 0;
478 		if (load(1, false)) {
479 		    fCurrentEntity.columnNumber++;
480 		    String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch,
481 							   0, 1);
482 		    return symbol;
483 		}
484 	    }
485 	    while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
486 		if (++fCurrentEntity.position == fCurrentEntity.count) {
487 		    int length = fCurrentEntity.position - offset;
488 		    if (length == fBufferSize) {
489 			// bad luck we have to resize our buffer
490 			char[] tmp = new char[fBufferSize * 2];
491 			System.arraycopy(fCurrentEntity.ch, offset,
492 					 tmp, 0, length);
493 			fCurrentEntity.ch = tmp;
494 			fBufferSize *= 2;
495 		    } else {
496 			System.arraycopy(fCurrentEntity.ch, offset,
497 					 fCurrentEntity.ch, 0, length);
498 		    }
499 		    offset = 0;
500 		    if (load(length, false)) {
501 			break;
502 		    }
503 		}
504 	    }
505 	}
506 	int length = fCurrentEntity.position - offset;
507 	fCurrentEntity.columnNumber += length;
508 
509 	// return name
510 	String symbol = null;
511 	if (length > 0) {
512 	    symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
513 	}
514 	return symbol;
515 	
516     }
517 
518     // Adapted from:
519     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanLiteral
520     /**
521      * Scans a range of attribute value data, setting the fields of the
522      * XMLString structure, appropriately.
523      * <p>
524      * <strong>Note:</strong> The characters are consumed.
525      * <p>
526      * <strong>Note:</strong> This method does not guarantee to return
527      * the longest run of attribute value data. This method may return
528      * before the quote character due to reaching the end of the input
529      * buffer or any other reason.
530      * <p>
531      * <strong>Note:</strong> The fields contained in the XMLString
532      * structure are not guaranteed to remain valid upon subsequent calls
533      * to the entity scanner. Therefore, the caller is responsible for
534      * immediately using the returned character data or making a copy of
535      * the character data.
536      *
537      * @param quote   The quote character that signifies the end of the
538      *                attribute value data.
539      * @param content The content structure to fill.
540      *
541      * @return Returns the next character on the input, if known. This
542      *         value may be -1 but this does <em>note</em> designate
543      *         end of file.
544      *
545      * @throws IOException  Thrown if i/o error occurs.
546      * @throws EOFException Thrown on end of file.
547      */
548     public int scanLiteral(int quote, XMLString content)
549 	throws IOException {
550 
551 	// load more characters, if needed
552 	if (fCurrentEntity.position == fCurrentEntity.count) {
553 	    load(0, true);
554 	} else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
555 	    fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
556 	    load(1, false);
557 	    fCurrentEntity.position = 0;
558 	}
559 
560 	// normalize newlines
561 	int offset = fCurrentEntity.position;
562 	int c = fCurrentEntity.ch[offset];
563 	int newlines = 0;
564 	boolean external = fCurrentEntity.isExternal();
565 	if (c == '\n' || (c == '\r' && external)) {
566 	    do {
567 		c = fCurrentEntity.ch[fCurrentEntity.position++];
568 		if (c == '\r' && external) {
569 		    newlines++;
570 		    fCurrentEntity.lineNumber++;
571 		    fCurrentEntity.columnNumber = 1;
572 		    if (fCurrentEntity.position == fCurrentEntity.count) {
573 			offset = 0;
574 			fCurrentEntity.position = newlines;
575 			if (load(newlines, false)) {
576 			    break;
577 			}
578 		    }
579 		    if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
580 			fCurrentEntity.position++;
581 			offset++;
582 		    }
583 		    /*** NEWLINE NORMALIZATION ***/
584 		    else {
585 			newlines++;
586 		    }
587 		    /***/
588 		}
589 		else if (c == '\n') {
590 		    newlines++;
591 		    fCurrentEntity.lineNumber++;
592 		    fCurrentEntity.columnNumber = 1;
593 		    if (fCurrentEntity.position == fCurrentEntity.count) {
594 			offset = 0;
595 			fCurrentEntity.position = newlines;
596 			if (load(newlines, false)) {
597 			    break;
598 			}
599 		    }
600 		    /*** NEWLINE NORMALIZATION ***
601 			 if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
602 			 && external) {
603 			 fCurrentEntity.position++;
604 			 offset++;
605 			 }
606 			 /***/
607 		}
608 		else {
609 		    fCurrentEntity.position--;
610 		    break;
611 		}
612 	    } while (fCurrentEntity.position < fCurrentEntity.count - 1);
613 	    for (int i = offset; i < fCurrentEntity.position; i++) {
614 		fCurrentEntity.ch[i] = '\n';
615 	    }
616 	    int length = fCurrentEntity.position - offset;
617 	    if (fCurrentEntity.position == fCurrentEntity.count - 1) {
618 		content.setValues(fCurrentEntity.ch, offset, length);
619 		return -1;
620 	    }
621 	}
622 
623 	// scan literal value
624 	while (fCurrentEntity.position < fCurrentEntity.count) {
625 	    c = fCurrentEntity.ch[fCurrentEntity.position++];
626 	    if ((c == quote &&
627 		 (!fCurrentEntity.literal || external))
628 		|| c == '%' || !XMLChar.isContent(c)) {
629 		fCurrentEntity.position--;
630 		break;
631 	    }
632 	}
633 	int length = fCurrentEntity.position - offset;
634 	fCurrentEntity.columnNumber += length - newlines;
635 	content.setValues(fCurrentEntity.ch, offset, length);
636 
637 	// return next character
638 	if (fCurrentEntity.position != fCurrentEntity.count) {
639 	    c = fCurrentEntity.ch[fCurrentEntity.position];
640 	    // NOTE: We don't want to accidentally signal the
641 	    //       end of the literal if we're expanding an
642 	    //       entity appearing in the literal. -Ac
643 	    if (c == quote && fCurrentEntity.literal) {
644 		c = -1;
645 	    }
646 	}
647 	else {
648 	    c = -1;
649 	}
650 	return c;
651 
652     }
653 
654     /**
655      * Scans a range of character data up to the specified delimiter,
656      * setting the fields of the XMLString structure, appropriately.
657      * <p>
658      * <strong>Note:</strong> The characters are consumed.
659      * <p>
660      * <strong>Note:</strong> This assumes that the internal buffer is
661      * at least the same size, or bigger, than the length of the delimiter
662      * and that the delimiter contains at least one character.
663      * <p>
664      * <strong>Note:</strong> This method does not guarantee to return
665      * the longest run of character data. This method may return before
666      * the delimiter due to reaching the end of the input buffer or any
667      * other reason.
668      * <p>
669      * <strong>Note:</strong> The fields contained in the XMLString
670      * structure are not guaranteed to remain valid upon subsequent calls
671      * to the entity scanner. Therefore, the caller is responsible for
672      * immediately using the returned character data or making a copy of
673      * the character data.
674      *
675      * @param delimiter The string that signifies the end of the character
676      *                  data to be scanned.
677      * @param buffer    The data structure to fill.
678      *
679      * @return Returns true if there is more data to scan, false otherwise.
680      *
681      * @throws IOException  Thrown if i/o error occurs.
682      * @throws EOFException Thrown on end of file.
683      */
684     public boolean scanData(String delimiter, XMLStringBuffer buffer)
685 	throws IOException {
686 
687 	boolean done = false;
688 	int delimLen = delimiter.length();
689 	char charAt0 = delimiter.charAt(0);
690 	boolean external = fCurrentEntity.isExternal();
691 	do {
692     
693 	    // load more characters, if needed
694     
695 	    if (fCurrentEntity.position == fCurrentEntity.count) {
696 		load(0, true);
697 	    }
698 	    else if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
699 		System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position,
700 				 fCurrentEntity.ch, 0, fCurrentEntity.count - fCurrentEntity.position);
701 		load(fCurrentEntity.count - fCurrentEntity.position, false);
702 		fCurrentEntity.position = 0;
703 	    } 
704 	    if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
705 		// something must be wrong with the input: e.g., file ends an
706 		// unterminated comment
707 		int length = fCurrentEntity.count - fCurrentEntity.position;
708 		buffer.append (fCurrentEntity.ch, fCurrentEntity.position,
709 			       length); 
710 		fCurrentEntity.columnNumber += fCurrentEntity.count;
711 		fCurrentEntity.position = fCurrentEntity.count;
712 		load(0,true);
713 		return false;
714 	    }
715     
716 	    // normalize newlines
717 	    int offset = fCurrentEntity.position;
718 	    int c = fCurrentEntity.ch[offset];
719 	    int newlines = 0;
720 	    if (c == '\n' || (c == '\r' && external)) {
721 		do {
722 		    c = fCurrentEntity.ch[fCurrentEntity.position++];
723 		    if (c == '\r' && external) {
724 			newlines++;
725 			fCurrentEntity.lineNumber++;
726 			fCurrentEntity.columnNumber = 1;
727 			if (fCurrentEntity.position == fCurrentEntity.count) {
728 			    offset = 0;
729 			    fCurrentEntity.position = newlines;
730 			    if (load(newlines, false)) {
731 				break;
732 			    }
733 			}
734 			if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
735 			    fCurrentEntity.position++;
736 			    offset++;
737 			}
738 			/*** NEWLINE NORMALIZATION ***/
739 			else {
740 			    newlines++;
741 			}
742 		    }
743 		    else if (c == '\n') {
744 			newlines++;
745 			fCurrentEntity.lineNumber++;
746 			fCurrentEntity.columnNumber = 1;
747 			if (fCurrentEntity.position == fCurrentEntity.count) {
748 			    offset = 0;
749 			    fCurrentEntity.position = newlines;
750 			    fCurrentEntity.count = newlines;
751 			    if (load(newlines, false)) {
752 				break;
753 			    }
754 			}
755 		    }
756 		    else {
757 			fCurrentEntity.position--;
758 			break;
759 		    }
760 		} while (fCurrentEntity.position < fCurrentEntity.count - 1);
761 		for (int i = offset; i < fCurrentEntity.position; i++) {
762 		    fCurrentEntity.ch[i] = '\n';
763 		}
764 		int length = fCurrentEntity.position - offset;
765 		if (fCurrentEntity.position == fCurrentEntity.count - 1) {
766 		    buffer.append(fCurrentEntity.ch, offset, length);
767 		    return true;
768 		}
769 	    }
770     
771 	    // iterate over buffer looking for delimiter
772 	OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
773 	    c = fCurrentEntity.ch[fCurrentEntity.position++];
774 	    if (c == charAt0) {
775 		// looks like we just hit the delimiter
776 		int delimOffset = fCurrentEntity.position - 1;
777 		for (int i = 1; i < delimLen; i++) {
778 		    if (fCurrentEntity.position == fCurrentEntity.count) {
779 			fCurrentEntity.position -= i;
780 			break OUTER;
781 		    }
782 		    c = fCurrentEntity.ch[fCurrentEntity.position++];
783 		    if (delimiter.charAt(i) != c) {
784 			fCurrentEntity.position--;
785 			break;
786 		    }
787 		}
788 		if (fCurrentEntity.position == delimOffset + delimLen) {
789 		    done = true;
790 		    break;
791 		}
792 	    }
793 	    else if (c == '\n' || (external && c == '\r')) {
794 		fCurrentEntity.position--;
795 		break;
796 	    }
797 	    else if (XMLChar.isInvalid(c)) {
798 		fCurrentEntity.position--;
799 		int length = fCurrentEntity.position - offset;
800 		fCurrentEntity.columnNumber += length - newlines;
801 		buffer.append(fCurrentEntity.ch, offset, length); 
802 		return true;
803 	    }
804 	}
805 	    int length = fCurrentEntity.position - offset;
806 	    fCurrentEntity.columnNumber += length - newlines;
807 	    if (done) {
808 		length -= delimLen;
809 	    }
810 	    buffer.append (fCurrentEntity.ch, offset, length);
811     
812 	    // return true if string was skipped
813 	} while (!done);
814 	return !done;
815 
816     }
817 
818     // Adapted from:
819     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipChar
820     /**
821      * Skips a character appearing immediately on the input.
822      * <p>
823      * <strong>Note:</strong> The character is consumed only if it matches
824      * the specified character.
825      *
826      * @param c The character to skip.
827      *
828      * @return Returns true if the character was skipped.
829      *
830      * @throws IOException  Thrown if i/o error occurs.
831      * @throws EOFException Thrown on end of file.
832      */
833     public boolean skipChar(int c) throws IOException {
834 
835 	// load more characters, if needed
836 	if (fCurrentEntity.position == fCurrentEntity.count) {
837 	    load(0, true);
838 	}
839 
840 	// skip character
841 	int cc = fCurrentEntity.ch[fCurrentEntity.position];
842 	if (cc == c) {
843 	    fCurrentEntity.position++;
844 	    if (c == '\n') {
845 		fCurrentEntity.lineNumber++;
846 		fCurrentEntity.columnNumber = 1;
847 	    }
848 	    else {
849 		fCurrentEntity.columnNumber++;
850 	    }
851 	    return true;
852 	} else if (c == '\n' && cc == '\r' && fCurrentEntity.isExternal()) {
853 	    // handle newlines
854 	    if (fCurrentEntity.position == fCurrentEntity.count) {
855 		fCurrentEntity.ch[0] = (char)cc;
856 		load(1, false);
857 	    }
858 	    fCurrentEntity.position++;
859 	    if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
860 		fCurrentEntity.position++;
861 	    }
862 	    fCurrentEntity.lineNumber++;
863 	    fCurrentEntity.columnNumber = 1;
864 	    return true;
865 	}
866 
867 	// character was not skipped
868 	return false;
869 
870     }
871 
872     // Adapted from:
873     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipSpaces
874     /**
875      * Skips space characters appearing immediately on the input.
876      * <p>
877      * <strong>Note:</strong> The characters are consumed only if they are
878      * space characters.
879      *
880      * @return Returns true if at least one space character was skipped.
881      *
882      * @throws IOException  Thrown if i/o error occurs.
883      * @throws EOFException Thrown on end of file.
884      *
885      * @see XMLChar#isSpace
886      */
887     public boolean skipSpaces() throws IOException {
888 
889 	// load more characters, if needed
890 	if (fCurrentEntity.position == fCurrentEntity.count) {
891 	    load(0, true);
892 	}
893 
894 	// skip spaces
895 	int c = fCurrentEntity.ch[fCurrentEntity.position];
896 	if (XMLChar.isSpace(c)) {
897 	    boolean external = fCurrentEntity.isExternal();
898 	    do {
899 		boolean entityChanged = false;
900 		// handle newlines
901 		if (c == '\n' || (external && c == '\r')) {
902 		    fCurrentEntity.lineNumber++;
903 		    fCurrentEntity.columnNumber = 1;
904 		    if (fCurrentEntity.position == fCurrentEntity.count - 1) {
905 			fCurrentEntity.ch[0] = (char)c;
906 			entityChanged = load(1, true);
907 			if (!entityChanged)
908                                 // the load change the position to be 1,
909                                 // need to restore it when entity not changed
910 			    fCurrentEntity.position = 0;
911 		    }
912 		    if (c == '\r' && external) {
913 			// REVISIT: Does this need to be updated to fix the
914 			//          #x0D ^#x0A newline normalization problem? -Ac
915 			if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
916 			    fCurrentEntity.position--;
917 			}
918 		    }
919 		    /*** NEWLINE NORMALIZATION ***
920 			 else {
921 			 if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
922 			 && external) {
923 			 fCurrentEntity.position++;
924 			 }
925 			 }
926 			 /***/
927 		}
928 		else {
929 		    fCurrentEntity.columnNumber++;
930 		}
931 		// load more characters, if needed
932 		if (!entityChanged)
933 		    fCurrentEntity.position++;
934 		if (fCurrentEntity.position == fCurrentEntity.count) {
935 		    load(0, true);
936 		}
937 	    } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
938 	    return true;
939 	}
940 
941 	// no spaces were found
942 	return false;
943 
944     }
945 
946     /**
947      * Skips the specified string appearing immediately on the input.
948      * <p>
949      * <strong>Note:</strong> The characters are consumed only if they are
950      * space characters.
951      *
952      * @param s The string to skip.
953      *
954      * @return Returns true if the string was skipped.
955      *
956      * @throws IOException  Thrown if i/o error occurs.
957      * @throws EOFException Thrown on end of file.
958      */
959     public boolean skipString(String s) throws IOException {
960 
961 	// load more characters, if needed
962 	if (fCurrentEntity.position == fCurrentEntity.count) {
963 	    load(0, true);
964 	}
965 
966 	// skip string
967 	final int length = s.length();
968 	for (int i = 0; i < length; i++) {
969 	    char c = fCurrentEntity.ch[fCurrentEntity.position++];
970 	    if (c != s.charAt(i)) {
971 		fCurrentEntity.position -= i + 1;
972 		return false;
973 	    }
974 	    if (i < length - 1 && fCurrentEntity.position == fCurrentEntity.count) {
975 		System.arraycopy(fCurrentEntity.ch, fCurrentEntity.count - i - 1, fCurrentEntity.ch, 0, i + 1);
976 		// REVISIT: Can a string to be skipped cross an
977 		//          entity boundary? -Ac
978 		if (load(i + 1, false)) {
979 		    fCurrentEntity.position -= i + 1;
980 		    return false;
981 		}
982 	    }
983 	}
984 	fCurrentEntity.columnNumber += length;
985 	return true;
986 
987     }
988 
989     // Adapted from:
990     // org.apache.xerces.impl.XMLEntityManager.EntityScanner.load
991     /**
992      * Loads a chunk of text.
993      *
994      * @param offset       The offset into the character buffer to
995      *                     read the next batch of characters.
996      * @param changeEntity True if the load should change entities
997      *                     at the end of the entity, otherwise leave
998      *                     the current entity in place and the entity
999      *                     boundary will be signaled by the return
1000      *                     value.
1001      *
1002      * @returns Returns true if the entity changed as a result of this
1003      *          load operation.
1004      */
1005     final boolean load(int offset, boolean changeEntity)
1006 	throws IOException {
1007 
1008 	// read characters
1009 	int length = fCurrentEntity.mayReadChunks?
1010 	    (fCurrentEntity.ch.length - offset):
1011 	    (DEFAULT_XMLDECL_BUFFER_SIZE);
1012 	int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset,
1013 					       length);
1014 
1015 	// reset count and position
1016 	boolean entityChanged = false;
1017 	if (count != -1) {
1018 	    if (count != 0) {
1019 		fCurrentEntity.count = count + offset;
1020 		fCurrentEntity.position = offset;
1021 	    }
1022 	}
1023 
1024 	// end of this entity
1025 	else {
1026 	    fCurrentEntity.count = offset;
1027 	    fCurrentEntity.position = offset;
1028 	    entityChanged = true;
1029 	    if (changeEntity) {
1030 		endEntity();
1031 		if (fCurrentEntity == null) {
1032 		    throw new EOFException();
1033 		}
1034 		// handle the trailing edges
1035 		if (fCurrentEntity.position == fCurrentEntity.count) {
1036 		    load(0, false);
1037 		}
1038 	    }
1039 	}
1040 
1041 	return entityChanged;
1042 
1043     }
1044 
1045     // Adapted from:
1046     // org.apache.xerces.impl.XMLEntityManager.RewindableInputStream
1047     /**
1048      * This class wraps the byte inputstreams we're presented with.
1049      * We need it because java.io.InputStreams don't provide
1050      * functionality to reread processed bytes, and they have a habit
1051      * of reading more than one character when you call their read()
1052      * methods.  This means that, once we discover the true (declared)
1053      * encoding of a document, we can neither backtrack to read the
1054      * whole doc again nor start reading where we are with a new
1055      * reader.
1056      *
1057      * This class allows rewinding an inputStream by allowing a mark
1058      * to be set, and the stream reset to that position.  <strong>The
1059      * class assumes that it needs to read one character per
1060      * invocation when it's read() method is inovked, but uses the
1061      * underlying InputStream's read(char[], offset length) method--it
1062      * won't buffer data read this way!</strong>
1063      *
1064      * @author Neil Graham, IBM
1065      * @author Glenn Marcy, IBM
1066      */
1067     private final class RewindableInputStream extends InputStream {
1068 
1069         private InputStream fInputStream;
1070         private byte[] fData;
1071         private int fStartOffset;
1072         private int fEndOffset;
1073         private int fOffset;
1074         private int fLength;
1075         private int fMark;
1076 
1077         public RewindableInputStream(InputStream is) {
1078             fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE];
1079             fInputStream = is;
1080             fStartOffset = 0;
1081             fEndOffset = -1;
1082             fOffset = 0;
1083             fLength = 0;
1084             fMark = 0;
1085         }
1086 
1087         public void setStartOffset(int offset) {
1088             fStartOffset = offset;
1089         }
1090 
1091         public void rewind() {
1092             fOffset = fStartOffset;
1093         }
1094 
1095         public int read() throws IOException {
1096             int b = 0;
1097             if (fOffset < fLength) {
1098                 return fData[fOffset++] & 0xff;
1099             }
1100             if (fOffset == fEndOffset) {
1101                 return -1;
1102             }
1103             if (fOffset == fData.length) {
1104                 byte[] newData = new byte[fOffset << 1];
1105                 System.arraycopy(fData, 0, newData, 0, fOffset);
1106                 fData = newData;
1107             }
1108             b = fInputStream.read();
1109             if (b == -1) {
1110                 fEndOffset = fOffset;
1111                 return -1;
1112             }
1113             fData[fLength++] = (byte)b;
1114             fOffset++;
1115             return b & 0xff;
1116         }
1117 
1118         public int read(byte[] b, int off, int len) throws IOException {
1119             int bytesLeft = fLength - fOffset;
1120             if (bytesLeft == 0) {
1121                 if (fOffset == fEndOffset) {
1122                     return -1;
1123                 }
1124                 // better get some more for the voracious reader...
1125                 if (fCurrentEntity.mayReadChunks) {
1126                     return fInputStream.read(b, off, len);
1127                 }
1128                 int returnedVal = read();
1129                 if (returnedVal == -1) {
1130                     fEndOffset = fOffset;
1131                     return -1;
1132                 }
1133                 b[off] = (byte)returnedVal;
1134                 return 1;
1135             }
1136             if (len < bytesLeft) {
1137                 if (len <= 0) {
1138                     return 0;
1139                 }
1140             }
1141             else {
1142                 len = bytesLeft;
1143             }
1144             if (b != null) {
1145                 System.arraycopy(fData, fOffset, b, off, len);
1146             }
1147             fOffset += len;
1148             return len;
1149         }
1150 
1151         public long skip(long n)
1152             throws IOException
1153         {
1154             int bytesLeft;
1155             if (n <= 0) {
1156                 return 0;
1157             }
1158             bytesLeft = fLength - fOffset;
1159             if (bytesLeft == 0) {
1160                 if (fOffset == fEndOffset) {
1161                     return 0;
1162                 }
1163                 return fInputStream.skip(n);
1164             }
1165             if (n <= bytesLeft) {
1166                 fOffset += n;
1167                 return n;
1168             }
1169             fOffset += bytesLeft;
1170             if (fOffset == fEndOffset) {
1171                 return bytesLeft;
1172             }
1173             n -= bytesLeft;
1174 	    /*
1175 	     * In a manner of speaking, when this class isn't permitting more
1176 	     * than one byte at a time to be read, it is "blocking".  The
1177 	     * available() method should indicate how much can be read without
1178 	     * blocking, so while we're in this mode, it should only indicate
1179 	     * that bytes in its buffer are available; otherwise, the result of
1180 	     * available() on the underlying InputStream is appropriate.
1181 	     */
1182             return fInputStream.skip(n) + bytesLeft;
1183         }
1184 
1185         public int available() throws IOException {
1186             int bytesLeft = fLength - fOffset;
1187             if (bytesLeft == 0) {
1188                 if (fOffset == fEndOffset) {
1189                     return -1;
1190                 }
1191                 return fCurrentEntity.mayReadChunks ? fInputStream.available()
1192 		    : 0;
1193             }
1194             return bytesLeft;
1195         }
1196 
1197         public void mark(int howMuch) {
1198             fMark = fOffset;
1199         }
1200 
1201         public void reset() {
1202             fOffset = fMark;
1203         }
1204 
1205         public boolean markSupported() {
1206             return true;
1207         }
1208 
1209         public void close() throws IOException {
1210             if (fInputStream != null) {
1211                 fInputStream.close();
1212                 fInputStream = null;
1213             }
1214         }
1215     } // end of RewindableInputStream class
1216 
1217     // Adapted from:
1218     // org.apache.xerces.impl.XMLDocumentScannerImpl.dispatch
1219     private void scanXMLDecl() throws IOException, JasperException {
1220 
1221 	if (skipString("<?xml")) {
1222 	    fMarkupDepth++;
1223 	    // NOTE: special case where document starts with a PI
1224 	    //       whose name starts with "xml" (e.g. "xmlfoo")
1225 	    if (XMLChar.isName(peekChar())) {
1226 		fStringBuffer.clear();
1227 		fStringBuffer.append("xml");
1228 		while (XMLChar.isName(peekChar())) {
1229 		    fStringBuffer.append((char)scanChar());
1230 		}
1231 		String target = fSymbolTable.addSymbol(fStringBuffer.ch,
1232 						       fStringBuffer.offset,
1233 						       fStringBuffer.length);
1234 		scanPIData(target, fString);
1235 	    }
1236 
1237 	    // standard XML declaration
1238 	    else {
1239 		scanXMLDeclOrTextDecl(false);
1240 	    }
1241 	}
1242     }
1243     
1244     // Adapted from:
1245     // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanXMLDeclOrTextDecl
1246     /**
1247      * Scans an XML or text declaration.
1248      * <p>
1249      * <pre>
1250      * [23] XMLDecl ::= '&lt;?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1251      * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
1252      * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
1253      * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1254      * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
1255      *                 | ('"' ('yes' | 'no') '"'))
1256      *
1257      * [77] TextDecl ::= '&lt;?xml' VersionInfo? EncodingDecl S? '?>'
1258      * </pre>
1259      *
1260      * @param scanningTextDecl True if a text declaration is to
1261      *                         be scanned instead of an XML
1262      *                         declaration.
1263      */
1264     private void scanXMLDeclOrTextDecl(boolean scanningTextDecl) 
1265         throws IOException, JasperException {
1266 
1267         // scan decl
1268         scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
1269         fMarkupDepth--;
1270 
1271         // pseudo-attribute values
1272         String encodingPseudoAttr = fStrings[1];
1273 
1274         // set encoding on reader
1275         if (encodingPseudoAttr != null) {
1276             isEncodingSetInProlog = true;
1277 	    encoding = encodingPseudoAttr;
1278         }
1279     }
1280 
1281     // Adapted from:
1282     // org.apache.xerces.impl.XMLScanner.scanXMLDeclOrTextDecl
1283     /**
1284      * Scans an XML or text declaration.
1285      * <p>
1286      * <pre>
1287      * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1288      * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
1289      * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
1290      * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1291      * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
1292      *                 | ('"' ('yes' | 'no') '"'))
1293      *
1294      * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1295      * </pre>
1296      *
1297      * @param scanningTextDecl True if a text declaration is to
1298      *                         be scanned instead of an XML
1299      *                         declaration.
1300      * @param pseudoAttributeValues An array of size 3 to return the version,
1301      *                         encoding and standalone pseudo attribute values
1302      *                         (in that order).
1303      *
1304      * <strong>Note:</strong> This method uses fString, anything in it
1305      * at the time of calling is lost.
1306      */
1307     private void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
1308 				       String[] pseudoAttributeValues) 
1309                 throws IOException, JasperException {
1310 
1311         // pseudo-attribute values
1312         String version = null;
1313         String encoding = null;
1314         String standalone = null;
1315 
1316         // scan pseudo-attributes
1317         final int STATE_VERSION = 0;
1318         final int STATE_ENCODING = 1;
1319         final int STATE_STANDALONE = 2;
1320         final int STATE_DONE = 3;
1321         int state = STATE_VERSION;
1322 
1323         boolean dataFoundForTarget = false;
1324         boolean sawSpace = skipSpaces();
1325         while (peekChar() != '?') {
1326             dataFoundForTarget = true;
1327             String name = scanPseudoAttribute(scanningTextDecl, fString);
1328             switch (state) {
1329                 case STATE_VERSION: {
1330                     if (name == fVersionSymbol) {
1331                         if (!sawSpace) {
1332                             reportFatalError(scanningTextDecl
1333                                        ? "jsp.error.xml.spaceRequiredBeforeVersionInTextDecl"
1334                                        : "jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl",
1335                                              null);
1336                         }
1337                         version = fString.toString();
1338                         state = STATE_ENCODING;
1339                         if (!version.equals("1.0")) {
1340                             // REVISIT: XML REC says we should throw an error
1341 			    // in such cases.
1342                             // some may object the throwing of fatalError.
1343                             err.jspError("jsp.error.xml.versionNotSupported",
1344 					 version);
1345                         }
1346                     } else if (name == fEncodingSymbol) {
1347                         if (!scanningTextDecl) {
1348                             err.jspError("jsp.error.xml.versionInfoRequired");
1349                         }
1350                         if (!sawSpace) {
1351                             reportFatalError(scanningTextDecl
1352                                       ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
1353                                       : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
1354                                              null);
1355                         }
1356                         encoding = fString.toString();
1357                         state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
1358                     } else {
1359                         if (scanningTextDecl) {
1360                             err.jspError("jsp.error.xml.encodingDeclRequired");
1361                         }
1362                         else {
1363                             err.jspError("jsp.error.xml.versionInfoRequired");
1364                         }
1365                     }
1366                     break;
1367                 }
1368                 case STATE_ENCODING: {
1369                     if (name == fEncodingSymbol) {
1370                         if (!sawSpace) {
1371                             reportFatalError(scanningTextDecl
1372                                       ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
1373                                       : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
1374                                              null);
1375                         }
1376                         encoding = fString.toString();
1377                         state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
1378                         // TODO: check encoding name; set encoding on
1379                         //       entity scanner
1380                     } else if (!scanningTextDecl && name == fStandaloneSymbol) {
1381                         if (!sawSpace) {
1382                             err.jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
1383                         }
1384                         standalone = fString.toString();
1385                         state = STATE_DONE;
1386                         if (!standalone.equals("yes") && !standalone.equals("no")) {
1387                             err.jspError("jsp.error.xml.sdDeclInvalid");
1388                         }
1389                     } else {
1390                         err.jspError("jsp.error.xml.encodingDeclRequired");
1391                     }
1392                     break;
1393                 }
1394                 case STATE_STANDALONE: {
1395                     if (name == fStandaloneSymbol) {
1396                         if (!sawSpace) {
1397                             err.jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
1398                         }
1399                         standalone = fString.toString();
1400                         state = STATE_DONE;
1401                         if (!standalone.equals("yes") && !standalone.equals("no")) {
1402                             err.jspError("jsp.error.xml.sdDeclInvalid");
1403                         }
1404                     } else {
1405 			err.jspError("jsp.error.xml.encodingDeclRequired");
1406                     }
1407                     break;
1408                 }
1409                 default: {
1410                     err.jspError("jsp.error.xml.noMorePseudoAttributes");
1411                 }
1412             }
1413             sawSpace = skipSpaces();
1414         }
1415         // REVISIT: should we remove this error reporting?
1416         if (scanningTextDecl && state != STATE_DONE) {
1417             err.jspError("jsp.error.xml.morePseudoAttributes");
1418         }
1419         
1420         // If there is no data in the xml or text decl then we fail to report
1421 	// error for version or encoding info above.
1422         if (scanningTextDecl) {
1423             if (!dataFoundForTarget && encoding == null) {
1424                 err.jspError("jsp.error.xml.encodingDeclRequired");
1425             }
1426         } else {
1427             if (!dataFoundForTarget && version == null) {
1428                 err.jspError("jsp.error.xml.versionInfoRequired");
1429             }
1430         }
1431 
1432         // end
1433         if (!skipChar('?')) {
1434             err.jspError("jsp.error.xml.xmlDeclUnterminated");
1435         }
1436         if (!skipChar('>')) {
1437             err.jspError("jsp.error.xml.xmlDeclUnterminated");
1438 
1439         }
1440         
1441         // fill in return array
1442         pseudoAttributeValues[0] = version;
1443         pseudoAttributeValues[1] = encoding;
1444         pseudoAttributeValues[2] = standalone;
1445     }
1446 
1447     // Adapted from:
1448     // org.apache.xerces.impl.XMLScanner.scanPseudoAttribute
1449     /**
1450      * Scans a pseudo attribute.
1451      *
1452      * @param scanningTextDecl True if scanning this pseudo-attribute for a
1453      *                         TextDecl; false if scanning XMLDecl. This 
1454      *                         flag is needed to report the correct type of
1455      *                         error.
1456      * @param value            The string to fill in with the attribute 
1457      *                         value.
1458      *
1459      * @return The name of the attribute
1460      *
1461      * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
1462      * at the time of calling is lost.
1463      */
1464     public String scanPseudoAttribute(boolean scanningTextDecl, 
1465                                       XMLString value) 
1466                 throws IOException, JasperException {
1467 
1468         String name = scanName();
1469         if (name == null) {
1470             err.jspError("jsp.error.xml.pseudoAttrNameExpected");
1471         }
1472         skipSpaces();
1473         if (!skipChar('=')) {
1474             reportFatalError(scanningTextDecl ?
1475 			     "jsp.error.xml.eqRequiredInTextDecl"
1476                              : "jsp.error.xml.eqRequiredInXMLDecl",
1477 			     name);
1478         }
1479         skipSpaces();
1480         int quote = peekChar();
1481         if (quote != '\'' && quote != '"') {
1482             reportFatalError(scanningTextDecl ?
1483 			     "jsp.error.xml.quoteRequiredInTextDecl"
1484                              : "jsp.error.xml.quoteRequiredInXMLDecl" ,
1485 			     name);
1486         }
1487         scanChar();
1488         int c = scanLiteral(quote, value);
1489         if (c != quote) {
1490             fStringBuffer2.clear();
1491             do {
1492                 fStringBuffer2.append(value);
1493                 if (c != -1) {
1494                     if (c == '&' || c == '%' || c == '<' || c == ']') {
1495                         fStringBuffer2.append((char)scanChar());
1496                     }
1497                     else if (XMLChar.isHighSurrogate(c)) {
1498                         scanSurrogates(fStringBuffer2);
1499                     }
1500                     else if (XMLChar.isInvalid(c)) {
1501                         String key = scanningTextDecl
1502                             ? "jsp.error.xml.invalidCharInTextDecl"
1503 			    : "jsp.error.xml.invalidCharInXMLDecl";
1504                         reportFatalError(key, Integer.toString(c, 16));
1505                         scanChar();
1506                     }
1507                 }
1508                 c = scanLiteral(quote, value);
1509             } while (c != quote);
1510             fStringBuffer2.append(value);
1511             value.setValues(fStringBuffer2);
1512         }
1513         if (!skipChar(quote)) {
1514             reportFatalError(scanningTextDecl ?
1515 			     "jsp.error.xml.closeQuoteMissingInTextDecl"
1516                              : "jsp.error.xml.closeQuoteMissingInXMLDecl",
1517 			     name);
1518         }
1519 
1520         // return
1521         return name;
1522 
1523     }
1524     
1525     // Adapted from:
1526     // org.apache.xerces.impl.XMLScanner.scanPIData
1527     /**
1528      * Scans a processing data. This is needed to handle the situation
1529      * where a document starts with a processing instruction whose 
1530      * target name <em>starts with</em> "xml". (e.g. xmlfoo)
1531      *
1532      * <strong>Note:</strong> This method uses fStringBuffer, anything in it
1533      * at the time of calling is lost.
1534      *
1535      * @param target The PI target
1536      * @param data The string to fill in with the data
1537      */
1538     private void scanPIData(String target, XMLString data) 
1539         throws IOException, JasperException {
1540 
1541         // check target
1542         if (target.length() == 3) {
1543             char c0 = Character.toLowerCase(target.charAt(0));
1544             char c1 = Character.toLowerCase(target.charAt(1));
1545             char c2 = Character.toLowerCase(target.charAt(2));
1546             if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
1547                 err.jspError("jsp.error.xml.reservedPITarget");
1548             }
1549         }
1550 
1551         // spaces
1552         if (!skipSpaces()) {
1553             if (skipString("?>")) {
1554                 // we found the end, there is no data
1555                 data.clear();
1556                 return;
1557             }
1558             else {
1559                 // if there is data there should be some space
1560                 err.jspError("jsp.error.xml.spaceRequiredInPI");
1561             }
1562         }
1563 
1564         fStringBuffer.clear();
1565         // data
1566         if (scanData("?>", fStringBuffer)) {
1567             do {
1568                 int c = peekChar();
1569                 if (c != -1) {
1570                     if (XMLChar.isHighSurrogate(c)) {
1571                         scanSurrogates(fStringBuffer);
1572                     } else if (XMLChar.isInvalid(c)) {
1573                         err.jspError("jsp.error.xml.invalidCharInPI",
1574 				     Integer.toHexString(c));
1575                         scanChar();
1576                     }
1577                 }
1578             } while (scanData("?>", fStringBuffer));
1579         }
1580         data.setValues(fStringBuffer);
1581 
1582     }
1583 
1584     // Adapted from:
1585     // org.apache.xerces.impl.XMLScanner.scanSurrogates
1586     /**
1587      * Scans surrogates and append them to the specified buffer.
1588      * <p>
1589      * <strong>Note:</strong> This assumes the current char has already been
1590      * identified as a high surrogate.
1591      *
1592      * @param buf The StringBuffer to append the read surrogates to.
1593      * @returns True if it succeeded.
1594      */
1595     private boolean scanSurrogates(XMLStringBuffer buf)
1596         throws IOException, JasperException {
1597 
1598         int high = scanChar();
1599         int low = peekChar();
1600         if (!XMLChar.isLowSurrogate(low)) {
1601             err.jspError("jsp.error.xml.invalidCharInContent",
1602 			 Integer.toString(high, 16));
1603             return false;
1604         }
1605         scanChar();
1606 
1607         // convert surrogates to supplemental character
1608         int c = XMLChar.supplemental((char)high, (char)low);
1609 
1610         // supplemental character must be a valid XML character
1611         if (!XMLChar.isValid(c)) {
1612             err.jspError("jsp.error.xml.invalidCharInContent",
1613 			 Integer.toString(c, 16)); 
1614             return false;
1615         }
1616 
1617         // fill in the buffer
1618         buf.append((char)high);
1619         buf.append((char)low);
1620 
1621         return true;
1622 
1623     }
1624 
1625     // Adapted from:
1626     // org.apache.xerces.impl.XMLScanner.reportFatalError
1627     /**
1628      * Convenience function used in all XML scanners.
1629      */
1630     private void reportFatalError(String msgId, String arg)
1631                 throws JasperException {
1632         err.jspError(msgId, arg);
1633     }
1634 
1635 }
1636 
1637