BaseMarkupSerializer xref

View Javadoc

1   /**
2    *
3    * Copyright 2006 The Apache Software Foundation
4    *
5    *  Licensed under the Apache License, Version 2.0 (the "License");
6    *  you may not use this file except in compliance with the License.
7    *  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   */
17  
18  /*
19   * This code has been borrowed from the Apache Xerces project. We're copying the code to
20   * keep from adding a dependency on Xerces in the Geronimo kernel.
21   */
22  
23  package org.apache.geronimo.system.configuration;
24  
25  import java.io.Writer;
26  import java.io.OutputStream;
27  import java.io.IOException;
28  import java.util.Vector;
29  import java.util.Hashtable;
30  
31  import org.w3c.dom.Document;
32  import org.w3c.dom.DocumentFragment;
33  import org.w3c.dom.DocumentType;
34  import org.w3c.dom.Element;
35  import org.w3c.dom.Node;
36  import org.xml.sax.Locator;
37  import org.xml.sax.SAXException;
38  
39  /**
40   * Base class for a serializer supporting both DOM and SAX pretty
41   * serializing of XML/HTML/XHTML documents. Derives classes perform
42   * the method-specific serializing, this class provides the common
43   * serializing mechanisms.
44   * <p>
45   * The serializer must be initialized with the proper writer and
46   * output format before it can be used by calling {@link #init}.
47   * The serializer can be reused any number of times, but cannot
48   * be used concurrently by two threads.
49   * <p>
50   * If an output stream is used, the encoding is taken from the
51   * output format (defaults to <tt>UTF-8</tt>). If a writer is
52   * used, make sure the writer uses the same encoding (if applies)
53   * as specified in the output format.
54   * <p>
55   * The serializer supports both DOM and SAX. DOM serializing is done
56   * by calling {@link #serialize} and SAX serializing is done by firing
57   * SAX events and using the serializer as a document handler.
58   * This also applies to derived class.
59   * <p>
60   * If an I/O exception occurs while serializing, the serializer
61   * will not throw an exception directly, but only throw it
62   * at the end of serializing (either DOM or SAX's {@link
63   * org.xml.sax.DocumentHandler#endDocument}.
64   * <p>
65   * For elements that are not specified as whitespace preserving,
66   * the serializer will potentially break long text lines at space
67   * boundaries, indent lines, and serialize elements on separate
68   * lines. Line terminators will be regarded as spaces, and
69   * spaces at beginning of line will be stripped.
70   * <p>
71   * When indenting, the serializer is capable of detecting seemingly
72   * element content, and serializing these elements indented on separate
73   * lines. An element is serialized indented when it is the first or
74   * last child of an element, or immediate following or preceding
75   * another element.
76   *
77   *
78   * @version $Revision: 410741 $ $Date: 2006-05-31 21:35:48 -0700 (Wed, 31 May 2006) $
79   * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
80   * @see Serializer
81   * @see DOMSerializer
82   */
83  public abstract class BaseMarkupSerializer
84  {
85  
86      private EncodingInfo encodingInfo;
87  
88      /**
89       * Holds array of all element states that have been entered.
90       * The array is automatically resized. When leaving an element,
91       * it's state is not removed but reused when later returning
92       * to the same nesting level.
93       */
94      private ElementState[]  elementStates;
95  
96      /**
97       * The index of the next state to place in the array,
98       * or one plus the index of the current state. When zero,
99       * we are in no state.
100      */
101     private int             elementStateCount;
102 
103     /**
104      * Vector holding comments and PIs that come before the root
105      * element (even after it), see {@link #serializePreRoot}.
106      */
107     private Vector          preRoot;
108 
109     /**
110      * If the document has been started (header serialized), this
111      * flag is set to true so it's not started twice.
112      */
113     protected boolean       started;
114 
115     /**
116      * True if the serializer has been prepared. This flag is set
117      * to false when the serializer is reset prior to using it,
118      * and to true after it has been prepared for usage.
119      */
120     private boolean         prepared;
121 
122     /**
123      * Association between namespace URIs (keys) and prefixes (values).
124      * Accumulated here prior to starting an element and placing this
125      * list in the element state.
126      */
127     protected Hashtable     prefixes;
128 
129     /**
130      * The system identifier of the document type, if known.
131      */
132     protected String        docTypePublicId;
133 
134 
135     /**
136      * The system identifier of the document type, if known.
137      */
138     protected String        docTypeSystemId;
139 
140 
141     /**
142      * The output format associated with this serializer. This will never
143      * be a null reference. If no format was passed to the constructor,
144      * the default one for this document type will be used. The format
145      * object is never changed by the serializer.
146      */
147     protected OutputFormat   format;
148 
149 
150     /**
151      * The printer used for printing text parts.
152      */
153     protected Printer       printer;
154 
155 
156     /**
157      * True if indenting printer.
158      */
159     protected boolean       indenting;
160 
161 
162     /**
163      * The underlying writer.
164      */
165     private Writer          writer;
166 
167 
168     /**
169      * The output stream.
170      */
171     private OutputStream    output;
172 
173 
174     //--------------------------------//
175     // Constructor and initialization //
176     //--------------------------------//
177 
178 
179     /**
180      * Protected constructor can only be used by derived class.
181      * Must initialize the serializer before serializing any document,
182      * see {@link #init}.
183      */
184     protected BaseMarkupSerializer( OutputFormat format )
185     {
186         int i;
187 
188         elementStates = new ElementState[ 10 ];
189         for ( i = 0 ; i < elementStates.length ; ++i )
190             elementStates[ i ] = new ElementState();
191         this.format = format;
192     }
193 
194 
195     public void setOutputByteStream( OutputStream output )
196     {
197         if ( output == null )
198             throw new NullPointerException( "SER001 Argument 'output' is null." );
199         this.output = output;
200         writer = null;
201         reset();
202     }
203 
204 
205     public void setOutputCharStream( Writer writer )
206     {
207         if ( writer == null )
208             throw new NullPointerException( "SER001 Argument 'writer' is null." );
209         this.writer = writer;
210         output = null;
211         reset();
212     }
213 
214 
215     public void setOutputFormat( OutputFormat format )
216     {
217         if ( format == null )
218             throw new NullPointerException( "SER001 Argument 'format' is null." );
219         this.format = format;
220         reset();
221     }
222 
223 
224     public boolean reset()
225     {
226         if ( elementStateCount > 1 )
227             throw new IllegalStateException( "Serializer reset in the middle of serialization" );
228         prepared = false;
229         return true;
230     }
231 
232 
233     protected void prepare()
234         throws IOException
235     {
236         if ( prepared )
237             return;
238 
239         if ( writer == null && output == null )
240             throw new IOException( "SER002 No writer supplied for serializer" );
241         // If the output stream has been set, use it to construct
242         // the writer. It is possible that the serializer has been
243         // reused with the same output stream and different encoding.
244 
245         encodingInfo = format.getEncodingInfo();
246 
247         if ( output != null ) {
248             writer = encodingInfo.getWriter(output);
249         }
250 
251         if ( format.getIndenting() ) {
252             indenting = true;
253             printer = new IndentPrinter( writer, format );
254         } else {
255             indenting = false;
256             printer = new Printer( writer, format );
257         }
258 
259         ElementState state;
260 
261         elementStateCount = 0;
262         state = elementStates[ 0 ];
263         state.namespaceURI = null;
264         state.localName = null;
265         state.rawName = null;
266         state.preserveSpace = format.getPreserveSpace();
267         state.empty = true;
268         state.afterElement = false;
269         state.afterComment = false;
270         state.doCData = state.inCData = false;
271         state.prefixes = null;
272 
273         docTypePublicId = format.getDoctypePublic();
274         docTypeSystemId = format.getDoctypeSystem();
275         started = false;
276         prepared = true;
277     }
278 
279 
280 
281     //----------------------------------//
282     // DOM document serializing methods //
283     //----------------------------------//
284 
285 
286     /**
287      * Serializes the DOM element using the previously specified
288      * writer and output format. Throws an exception only if
289      * an I/O exception occured while serializing.
290      *
291      * @param elem The element to serialize
292      * @throws IOException An I/O exception occured while
293      *   serializing
294      */
295     public void serialize( Element elem )
296         throws IOException
297     {
298         prepare();
299         serializeNode( elem );
300         printer.flush();
301         if ( printer.getException() != null )
302             throw printer.getException();
303     }
304 
305 
306     /**
307      * Serializes the DOM document fragmnt using the previously specified
308      * writer and output format. Throws an exception only if
309      * an I/O exception occured while serializing.
310      *
311      * @param elem The element to serialize
312      * @throws IOException An I/O exception occured while
313      *   serializing
314      */
315     public void serialize( DocumentFragment frag )
316         throws IOException
317     {
318         prepare();
319         serializeNode( frag );
320         printer.flush();
321         if ( printer.getException() != null )
322             throw printer.getException();
323     }
324 
325 
326     /**
327      * Serializes the DOM document using the previously specified
328      * writer and output format. Throws an exception only if
329      * an I/O exception occured while serializing.
330      *
331      * @param doc The document to serialize
332      * @throws IOException An I/O exception occured while
333      *   serializing
334      */
335     public void serialize( Document doc )
336         throws IOException
337     {
338         prepare();
339         serializeNode( doc );
340         serializePreRoot();
341         printer.flush();
342         if ( printer.getException() != null )
343             throw printer.getException();
344     }
345 
346 
347     //------------------------------------------//
348     // SAX document handler serializing methods //
349     //------------------------------------------//
350 
351 
352     public void startDocument()
353         throws SAXException
354     {
355         try {
356             prepare();
357         } catch ( IOException except ) {
358             throw new SAXException( except.toString() );
359         }
360         // Nothing to do here. All the magic happens in startDocument(String)
361     }
362     
363     
364     public void characters( char[] chars, int start, int length )
365         throws SAXException
366     {
367         ElementState state;
368 
369         try {
370         state = content();
371 
372         // Check if text should be print as CDATA section or unescaped
373         // based on elements listed in the output format (the element
374         // state) or whether we are inside a CDATA section or entity.
375 
376         if ( state.inCData || state.doCData ) {
377             int          saveIndent;
378 
379             // Print a CDATA section. The text is not escaped, but ']]>'
380             // appearing in the code must be identified and dealt with.
381             // The contents of a text node is considered space preserving.
382             if ( ! state.inCData ) {
383                 printer.printText( "<![CDATA[" );
384                 state.inCData = true;
385             }
386             saveIndent = printer.getNextIndent();
387             printer.setNextIndent( 0 );
388             for ( int index = 0 ; index < length ; ++index ) {
389                 if ( index + 2 < length && chars[ index ] == ']' &&
390                      chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) {
391 
392                     printText( chars, start, index + 2, true, true );
393                     printer.printText( "]]><![CDATA[" );
394                     start += index + 2;
395                     length -= index + 2;
396                     index = 0;
397                 }
398             }
399             if ( length > 0 )
400                 printText( chars, start, length, true, true );
401             printer.setNextIndent( saveIndent );
402 
403         } else {
404 
405             int saveIndent;
406 
407             if ( state.preserveSpace ) {
408                 // If preserving space then hold of indentation so no
409                 // excessive spaces are printed at line breaks, escape
410                 // the text content without replacing spaces and print
411                 // the text breaking only at line breaks.
412                 saveIndent = printer.getNextIndent();
413                 printer.setNextIndent( 0 );
414                 printText( chars, start, length, true, state.unescaped );
415                 printer.setNextIndent( saveIndent );
416             } else {
417                 printText( chars, start, length, false, state.unescaped );
418             }
419         }
420         } catch ( IOException except ) {
421             throw new SAXException( except );
422         }
423     }
424 
425 
426     public void ignorableWhitespace( char[] chars, int start, int length )
427         throws SAXException
428     {
429         int i;
430 
431         try {
432         content();
433 
434         // Print ignorable whitespaces only when indenting, after
435         // all they are indentation. Cancel the indentation to
436         // not indent twice.
437         if ( indenting ) {
438             printer.setThisIndent( 0 );
439             for ( i = start ; length-- > 0 ; ++i )
440                 printer.printText( chars[ i ] );
441         }
442         } catch ( IOException except ) {
443             throw new SAXException( except );
444         }
445     }
446 
447 
448     public final void processingInstruction( String target, String code )
449         throws SAXException
450     {
451         try {
452             processingInstructionIO( target, code );
453         } catch ( IOException except ) {
454         throw new SAXException( except );
455         }
456     }
457 
458     public void processingInstructionIO( String target, String code )
459         throws IOException
460     {
461         int          index;
462         StringBuffer buffer;
463         ElementState state;
464 
465         state = content();
466         buffer = new StringBuffer( 40 );
467 
468         // Create the processing instruction textual representation.
469         // Make sure we don't have '?>' inside either target or code.
470         index = target.indexOf( "?>" );
471         if ( index >= 0 )
472             buffer.append( "<?" ).append( target.substring( 0, index ) );
473         else
474             buffer.append( "<?" ).append( target );
475         if ( code != null ) {
476             buffer.append( ' ' );
477             index = code.indexOf( "?>" );
478             if ( index >= 0 )
479                 buffer.append( code.substring( 0, index ) );
480             else
481                 buffer.append( code );
482         }
483         buffer.append( "?>" );
484 
485         // If before the root element (or after it), do not print
486         // the PI directly but place it in the pre-root vector.
487         if ( isDocumentState() ) {
488             if ( preRoot == null )
489                 preRoot = new Vector();
490             preRoot.addElement( buffer.toString() );
491         } else {
492             printer.indent();
493             printText( buffer.toString(), true, true );
494             printer.unindent();
495             if ( indenting )
496             state.afterElement = true;
497         }
498     }
499 
500 
501     public void comment( char[] chars, int start, int length )
502         throws SAXException
503     {
504         try {
505         comment( new String( chars, start, length ) );
506         } catch ( IOException except ) {
507             throw new SAXException( except );
508     }
509     }
510 
511 
512     public void comment( String text )
513         throws IOException
514     {
515         StringBuffer buffer;
516         int          index;
517         ElementState state;
518         
519         if ( format.getOmitComments() )
520             return;
521 
522         state  = content();
523         buffer = new StringBuffer( 40 );
524         // Create the processing comment textual representation.
525         // Make sure we don't have '-->' inside the comment.
526         index = text.indexOf( "-->" );
527         if ( index >= 0 )
528             buffer.append( "<!--" ).append( text.substring( 0, index ) ).append( "-->" );
529         else
530             buffer.append( "<!--" ).append( text ).append( "-->" );
531 
532         // If before the root element (or after it), do not print
533         // the comment directly but place it in the pre-root vector.
534         if ( isDocumentState() ) {
535             if ( preRoot == null )
536                 preRoot = new Vector();
537             preRoot.addElement( buffer.toString() );
538         } else {
539             // Indent this element on a new line if the first
540             // content of the parent element or immediately
541             // following an element.
542             if ( indenting && ! state.preserveSpace)
543                 printer.breakLine();
544                         printer.indent();
545             printText( buffer.toString(), true, true );
546                         printer.unindent();
547             if ( indenting )
548                 state.afterElement = true;
549         }
550                 state.afterComment = true;
551                 state.afterElement = false;
552     }
553 
554 
555     public void startCDATA()
556     {
557         ElementState state;
558 
559         state = getElementState();
560         state.doCData = true;
561     }
562 
563 
564     public void endCDATA()
565     {
566         ElementState state;
567 
568         state = getElementState();
569         state.doCData = false;
570     }
571 
572 
573     public void startNonEscaping()
574     {
575         ElementState state;
576 
577         state = getElementState();
578         state.unescaped = true;
579     }
580 
581 
582     public void endNonEscaping()
583     {
584         ElementState state;
585 
586         state = getElementState();
587         state.unescaped = false;
588     }
589 
590 
591     public void startPreserving()
592     {
593         ElementState state;
594 
595         state = getElementState();
596         state.preserveSpace = true;
597     }
598 
599 
600     public void endPreserving()
601     {
602         ElementState state;
603 
604         state = getElementState();
605         state.preserveSpace = false;
606     }
607 
608 
609     /**
610      * Called at the end of the document to wrap it up.
611      * Will flush the output stream and throw an exception
612      * if any I/O error occured while serializing.
613      *
614      * @throws SAXException An I/O exception occured during
615      *  serializing
616      */
617     public void endDocument()
618         throws SAXException
619     {
620         try {
621         // Print all the elements accumulated outside of
622         // the root element.
623         serializePreRoot();
624         // Flush the output, this is necessary for buffered output.
625         printer.flush();
626         } catch ( IOException except ) {
627             throw new SAXException( except );
628     }
629     }
630 
631 
632     public void startEntity( String name )
633     {
634         // ???
635     }
636 
637 
638     public void endEntity( String name )
639     {
640         // ???
641     }
642 
643 
644     public void setDocumentLocator( Locator locator )
645     {
646         // Nothing to do
647     }
648 
649 
650     //-----------------------------------------//
651     // SAX content handler serializing methods //
652     //-----------------------------------------//
653 
654 
655     public void skippedEntity ( String name )
656         throws SAXException
657     {
658         try {
659         endCDATA();
660         content();
661         printer.printText( '&' );
662         printer.printText( name );
663         printer.printText( ';' );
664         } catch ( IOException except ) {
665             throw new SAXException( except );
666     }
667     }
668 
669 
670     public void startPrefixMapping( String prefix, String uri )
671         throws SAXException
672     {
673         if ( prefixes == null )
674             prefixes = new Hashtable();
675         prefixes.put( uri, prefix == null ? "" : prefix );
676     }
677 
678 
679     public void endPrefixMapping( String prefix )
680         throws SAXException
681     {
682     }
683 
684 
685     //------------------------------------------//
686     // SAX DTD/Decl handler serializing methods //
687     //------------------------------------------//
688 
689 
690     public final void startDTD( String name, String publicId, String systemId )
691         throws SAXException
692     {
693         try {
694         printer.enterDTD();
695         docTypePublicId = publicId;
696         docTypeSystemId = systemId;
697         } catch ( IOException except ) {
698             throw new SAXException( except );
699         }
700     }
701 
702 
703     public void endDTD()
704     {
705         // Nothing to do here, all the magic occurs in startDocument(String).
706     }
707 
708 
709     public void elementDecl( String name, String model )
710         throws SAXException
711     {
712         try {
713         printer.enterDTD();
714         printer.printText( "<!ELEMENT " );
715         printer.printText( name );
716         printer.printText( ' ' );
717         printer.printText( model );
718         printer.printText( '>' );
719         if ( indenting )
720             printer.breakLine();
721         } catch ( IOException except ) {
722             throw new SAXException( except );
723         }
724     }
725 
726 
727     public void attributeDecl( String eName, String aName, String type,
728                                String valueDefault, String value )
729         throws SAXException
730     {
731         try {
732         printer.enterDTD();
733         printer.printText( "<!ATTLIST " );
734         printer.printText( eName );
735         printer.printText( ' ' );
736         printer.printText( aName );
737         printer.printText( ' ' );
738         printer.printText( type );
739         if ( valueDefault != null ) {
740             printer.printText( ' ' );
741             printer.printText( valueDefault );
742         }
743         if ( value != null ) {
744             printer.printText( " \"" );
745             printEscaped( value );
746             printer.printText( '"' );
747         }
748         printer.printText( '>' );
749         if ( indenting )
750             printer.breakLine();
751         } catch ( IOException except ) {
752             throw new SAXException( except );
753     }
754     }
755 
756 
757     public void internalEntityDecl( String name, String value )
758         throws SAXException
759     {
760         try {
761         printer.enterDTD();
762         printer.printText( "<!ENTITY " );
763         printer.printText( name );
764         printer.printText( " \"" );
765         printEscaped( value );
766         printer.printText( "\">" );
767         if ( indenting )
768             printer.breakLine();
769         } catch ( IOException except ) {
770             throw new SAXException( except );
771         }
772     }
773 
774 
775     public void externalEntityDecl( String name, String publicId, String systemId )
776         throws SAXException
777     {
778         try {
779         printer.enterDTD();
780         unparsedEntityDecl( name, publicId, systemId, null );
781         } catch ( IOException except ) {
782             throw new SAXException( except );
783         }
784     }
785 
786 
787     public void unparsedEntityDecl( String name, String publicId,
788                                     String systemId, String notationName )
789         throws SAXException
790     {
791         try {
792         printer.enterDTD();
793         if ( publicId == null ) {
794             printer.printText( "<!ENTITY " );
795             printer.printText( name );
796             printer.printText( " SYSTEM " );
797             printDoctypeURL( systemId );
798         } else {
799             printer.printText( "<!ENTITY " );
800             printer.printText( name );
801             printer.printText( " PUBLIC " );
802             printDoctypeURL( publicId );
803             printer.printText( ' ' );
804             printDoctypeURL( systemId );
805         }
806         if ( notationName != null ) {
807             printer.printText( " NDATA " );
808             printer.printText( notationName );
809         }
810         printer.printText( '>' );
811         if ( indenting )
812             printer.breakLine();
813         } catch ( IOException except ) {
814             throw new SAXException( except );
815     }
816     }
817 
818 
819     public void notationDecl( String name, String publicId, String systemId )
820         throws SAXException
821     {
822         try {
823         printer.enterDTD();
824         if ( publicId != null ) {
825             printer.printText( "<!NOTATION " );
826             printer.printText( name );
827             printer.printText( " PUBLIC " );
828             printDoctypeURL( publicId );
829             if ( systemId != null ) {
830                 printer.printText( ' ' );
831                 printDoctypeURL( systemId );
832             }
833         } else {
834             printer.printText( "<!NOTATION " );
835             printer.printText( name );
836             printer.printText( " SYSTEM " );
837             printDoctypeURL( systemId );
838         }
839         printer.printText( '>' );
840         if ( indenting )
841             printer.breakLine();
842         } catch ( IOException except ) {
843             throw new SAXException( except );
844         }
845     }
846 
847 
848     //------------------------------------------//
849     // Generic node serializing methods methods //
850     //------------------------------------------//
851 
852 
853     /**
854      * Serialize the DOM node. This method is shared across XML, HTML and XHTML
855      * serializers and the differences are masked out in a separate {@link
856      * #serializeElement}.
857      *
858      * @param node The node to serialize
859      * @see #serializeElement
860      * @throws IOException An I/O exception occured while
861      *   serializing
862      */
863     protected void serializeNode( Node node )
864         throws IOException
865     {
866         // Based on the node type call the suitable SAX handler.
867         // Only comments entities and documents which are not
868         // handled by SAX are serialized directly.
869         switch ( node.getNodeType() ) {
870         case Node.TEXT_NODE : {
871             String text;
872 
873             text = node.getNodeValue();
874             if ( text != null )
875                 if ( !indenting || getElementState().preserveSpace
876                      || (text.replace('\n',' ').trim().length() != 0))
877                     characters( text );
878             break;
879         }
880 
881         case Node.CDATA_SECTION_NODE : {
882             String text;
883 
884             text = node.getNodeValue();
885             if ( text != null ) {
886                 startCDATA();
887                 characters( text );
888                 endCDATA();
889             }
890             break;
891         }
892 
893         case Node.COMMENT_NODE : {
894             String text;
895 
896             if ( ! format.getOmitComments() ) {
897                 text = node.getNodeValue();
898                 if ( text != null )
899                     comment( text );
900             }
901             break;
902         }
903 
904         case Node.ENTITY_REFERENCE_NODE : {
905             Node         child;
906 
907             endCDATA();
908             content();
909             child = node.getFirstChild();
910             while ( child != null ) {
911                 serializeNode( child );
912                 child = child.getNextSibling();
913             }
914             break;
915         }
916 
917         case Node.PROCESSING_INSTRUCTION_NODE :
918             processingInstructionIO( node.getNodeName(), node.getNodeValue() );
919             break;
920 
921         case Node.ELEMENT_NODE :
922             serializeElement( (Element) node );
923             break;
924 
925         case Node.DOCUMENT_NODE : {
926             DocumentType      docType;
927 
928             // If there is a document type, use the SAX events to
929             // serialize it.
930             docType = ( (Document) node ).getDoctype();
931             if (docType != null) {
932                 // DOM Level 2 (or higher)
933                 // TODO: result of the following call was assigned to a local variable that was never
934                 // read. Can the call be deleted?
935                 ( (Document) node ).getImplementation();
936                 try {
937                     String internal;
938 
939                     printer.enterDTD();
940                     docTypePublicId = docType.getPublicId();
941                     docTypeSystemId = docType.getSystemId();
942                     internal = docType.getInternalSubset();
943                     if ( internal != null && internal.length() > 0 )
944                         printer.printText( internal );
945                     endDTD();
946                 }
947                 // DOM Level 1 -- does implementation have methods?
948                 catch (NoSuchMethodError nsme) {
949                     Class docTypeClass = docType.getClass();
950 
951                     String docTypePublicId = null;
952                     String docTypeSystemId = null;
953                     try {
954                         java.lang.reflect.Method getPublicId = docTypeClass.getMethod("getPublicId", null);
955                         if (getPublicId.getReturnType().equals(String.class)) {
956                             docTypePublicId = (String)getPublicId.invoke(docType, null);
957                         }
958                     }
959                     catch (Exception e) {
960                         // ignore
961                     }
962                     try {
963                         java.lang.reflect.Method getSystemId = docTypeClass.getMethod("getSystemId", null);
964                         if (getSystemId.getReturnType().equals(String.class)) {
965                             docTypeSystemId = (String)getSystemId.invoke(docType, null);
966                         }
967                     }
968                     catch (Exception e) {
969                         // ignore
970                     }
971                     this.printer.enterDTD();
972                     this.docTypePublicId = docTypePublicId;
973                     this.docTypeSystemId = docTypeSystemId;
974                     endDTD();
975                 }
976             }
977             // !! Fall through
978         }
979         case Node.DOCUMENT_FRAGMENT_NODE : {
980             Node         child;
981 
982             // By definition this will happen if the node is a document,
983             // document fragment, etc. Just serialize its contents. It will
984             // work well for other nodes that we do not know how to serialize.
985             child = node.getFirstChild();
986             while ( child != null ) {
987                 serializeNode( child );
988                 child = child.getNextSibling();
989             }
990             break;
991         }
992 
993         default:
994             break;
995         }
996     }
997 
998 
999     /**
1000      * Must be called by a method about to print any type of content.
1001      * If the element was just opened, the opening tag is closed and
1002      * will be matched to a closing tag. Returns the current element
1003      * state with <tt>empty</tt> and <tt>afterElement</tt> set to false.
1004      *
1005      * @return The current element state
1006      * @throws IOException An I/O exception occured while
1007      *   serializing
1008      */
1009     protected ElementState content()
1010         throws IOException
1011     {
1012         ElementState state;
1013 
1014         state = getElementState();
1015         if ( ! isDocumentState() ) {
1016             // Need to close CData section first
1017             if ( state.inCData && ! state.doCData ) {
1018                 printer.printText( "]]>" );
1019                 state.inCData = false;
1020             }
1021             // If this is the first content in the element,
1022             // change the state to not-empty and close the
1023             // opening element tag.
1024             if ( state.empty ) {
1025                 printer.printText( '>' );
1026                 state.empty = false;
1027             }
1028             // Except for one content type, all of them
1029             // are not last element. That one content
1030             // type will take care of itself.
1031             state.afterElement = false;
1032             // Except for one content type, all of them
1033             // are not last comment. That one content
1034             // type will take care of itself.
1035             state.afterComment = false;
1036         }
1037         return state;
1038     }
1039 
1040 
1041     /**
1042      * Called to print the text contents in the prevailing element format.
1043      * Since this method is capable of printing text as CDATA, it is used
1044      * for that purpose as well. White space handling is determined by the
1045      * current element state. In addition, the output format can dictate
1046      * whether the text is printed as CDATA or unescaped.
1047      *
1048      * @param text The text to print
1049      * @param unescaped True is should print unescaped
1050      * @throws IOException An I/O exception occured while
1051      *   serializing
1052      */
1053     protected void characters( String text )
1054         throws IOException
1055     {
1056         ElementState state;
1057 
1058         state = content();
1059         // Check if text should be print as CDATA section or unescaped
1060         // based on elements listed in the output format (the element
1061         // state) or whether we are inside a CDATA section or entity.
1062 
1063         if ( state.inCData || state.doCData ) {
1064             StringBuffer buffer;
1065             int          index;
1066             int          saveIndent;
1067 
1068             // Print a CDATA section. The text is not escaped, but ']]>'
1069             // appearing in the code must be identified and dealt with.
1070             // The contents of a text node is considered space preserving.
1071             buffer = new StringBuffer( text.length() );
1072             if ( ! state.inCData ) {
1073                 buffer.append( "<![CDATA[" );
1074                 state.inCData = true;
1075             }
1076             index = text.indexOf( "]]>" );
1077             while ( index >= 0 ) {
1078                 buffer.append( text.substring( 0, index + 2 ) ).append( "]]><![CDATA[" );
1079                 text = text.substring( index + 2 );
1080                 index = text.indexOf( "]]>" );
1081             }
1082             buffer.append( text );
1083             saveIndent = printer.getNextIndent();
1084             printer.setNextIndent( 0 );
1085             printText( buffer.toString(), true, true );
1086             printer.setNextIndent( saveIndent );
1087 
1088         } else {
1089 
1090             int saveIndent;
1091 
1092             if ( state.preserveSpace ) {
1093                 // If preserving space then hold of indentation so no
1094                 // excessive spaces are printed at line breaks, escape
1095                 // the text content without replacing spaces and print
1096                 // the text breaking only at line breaks.
1097                 saveIndent = printer.getNextIndent();
1098                 printer.setNextIndent( 0 );
1099                 printText( text, true, state.unescaped );
1100                 printer.setNextIndent( saveIndent );
1101             } else {
1102                 printText( text, false, state.unescaped );
1103             }
1104         }
1105     }
1106 
1107 
1108     /**
1109      * Returns the suitable entity reference for this character value,
1110      * or null if no such entity exists. Calling this method with <tt>'&amp;'</tt>
1111      * will return <tt>"&amp;amp;"</tt>.
1112      *
1113      * @param ch Character value
1114      * @return Character entity name, or null
1115      */
1116     protected abstract String getEntityRef( int ch );
1117 
1118 
1119     /**
1120      * Called to serializee the DOM element. The element is serialized based on
1121      * the serializer's method (XML, HTML, XHTML).
1122      *
1123      * @param elem The element to serialize
1124      * @throws IOException An I/O exception occured while
1125      *   serializing
1126      */
1127     protected abstract void serializeElement( Element elem )
1128         throws IOException;
1129 
1130 
1131     /**
1132      * Comments and PIs cannot be serialized before the root element,
1133      * because the root element serializes the document type, which
1134      * generally comes first. Instead such PIs and comments are
1135      * accumulated inside a vector and serialized by calling this
1136      * method. Will be called when the root element is serialized
1137      * and when the document finished serializing.
1138      *
1139      * @throws IOException An I/O exception occured while
1140      *   serializing
1141      */
1142     protected void serializePreRoot()
1143         throws IOException
1144     {
1145         int i;
1146 
1147         if ( preRoot != null ) {
1148             for ( i = 0 ; i < preRoot.size() ; ++i ) {
1149                 printText( (String) preRoot.elementAt( i ), true, true );
1150                 if ( indenting )
1151                 printer.breakLine();
1152             }
1153             preRoot.removeAllElements();
1154         }
1155     }
1156 
1157 
1158     //---------------------------------------------//
1159     // Text pretty printing and formatting methods //
1160     //---------------------------------------------//
1161 
1162 
1163     /**
1164      * Called to print additional text with whitespace handling.
1165      * If spaces are preserved, the text is printed as if by calling
1166      * {@link #printText(String)} with a call to {@link #breakLine}
1167      * for each new line. If spaces are not preserved, the text is
1168      * broken at space boundaries if longer than the line width;
1169      * Multiple spaces are printed as such, but spaces at beginning
1170      * of line are removed.
1171      *
1172      * @param text The text to print
1173      * @param preserveSpace Space preserving flag
1174      * @param unescaped Print unescaped
1175      */
1176     protected final void printText( char[] chars, int start, int length,
1177                                     boolean preserveSpace, boolean unescaped )
1178         throws IOException
1179     {
1180         char ch;
1181 
1182         if ( preserveSpace ) {
1183             // Preserving spaces: the text must print exactly as it is,
1184             // without breaking when spaces appear in the text and without
1185             // consolidating spaces. If a line terminator is used, a line
1186             // break will occur.
1187             while ( length-- > 0 ) {
1188                 ch = chars[ start ];
1189                 ++start;
1190                 if ( ch == '\n' || ch == '\r' || unescaped )
1191                     printer.printText( ch );
1192                 else
1193                     printEscaped( ch );
1194             }
1195         } else {
1196             // Not preserving spaces: print one part at a time, and
1197             // use spaces between parts to break them into different
1198             // lines. Spaces at beginning of line will be stripped
1199             // by printing mechanism. Line terminator is treated
1200             // no different than other text part.
1201             while ( length-- > 0 ) {
1202                 ch = chars[ start ];
1203                 ++start;
1204                 if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' )
1205                     printer.printSpace();
1206                 else if ( unescaped )
1207                     printer.printText( ch );
1208                 else
1209                     printEscaped( ch );
1210             }
1211         }
1212     }
1213 
1214 
1215     protected final void printText( String text, boolean preserveSpace, boolean unescaped )
1216         throws IOException
1217     {
1218         int index;
1219         char ch;
1220 
1221         if ( preserveSpace ) {
1222             // Preserving spaces: the text must print exactly as it is,
1223             // without breaking when spaces appear in the text and without
1224             // consolidating spaces. If a line terminator is used, a line
1225             // break will occur.
1226             for ( index = 0 ; index < text.length() ; ++index ) {
1227                 ch = text.charAt( index );
1228                 if ( ch == '\n' || ch == '\r' || unescaped )
1229                     printer.printText( ch );
1230                 else
1231                     printEscaped( ch );
1232             }
1233         } else {
1234             // Not preserving spaces: print one part at a time, and
1235             // use spaces between parts to break them into different
1236             // lines. Spaces at beginning of line will be stripped
1237             // by printing mechanism. Line terminator is treated
1238             // no different than other text part.
1239             for ( index = 0 ; index < text.length() ; ++index ) {
1240                 ch = text.charAt( index );
1241                 if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' )
1242                     printer.printSpace();
1243                 else if ( unescaped )
1244                     printer.printText( ch );
1245                 else
1246                     printEscaped( ch );
1247             }
1248         }
1249     }
1250 
1251 
1252     /**
1253      * Print a document type public or system identifier URL.
1254      * Encapsulates the URL in double quotes, escapes non-printing
1255      * characters and print it equivalent to {@link #printText}.
1256      *
1257      * @param url The document type url to print
1258      */
1259     protected void printDoctypeURL( String url )
1260         throws IOException
1261     {
1262         int                i;
1263 
1264         printer.printText( '"' );
1265         for( i = 0 ; i < url.length() ; ++i ) {
1266             if ( url.charAt( i ) == '"' ||  url.charAt( i ) < 0x20 || url.charAt( i ) > 0x7F ) {
1267                 printer.printText( '%' );
1268                 printer.printText( Integer.toHexString( url.charAt( i ) ) );
1269             } else
1270                 printer.printText( url.charAt( i ) );
1271         }
1272         printer.printText( '"' );
1273     }
1274 
1275 
1276     protected void printEscaped( int ch )
1277         throws IOException
1278     {
1279         String charRef;
1280 
1281         // If there is a suitable entity reference for this
1282         // character, print it. The list of available entity
1283         // references is almost but not identical between
1284         // XML and HTML.
1285         charRef = getEntityRef( ch );
1286         if ( charRef != null ) {
1287             printer.printText( '&' );
1288             printer.printText( charRef );
1289             printer.printText( ';' );
1290         } else if ( ( ch >= ' ' && encodingInfo.isPrintable(ch) && ch != 0xF7 ) ||
1291                     ch == '\n' || ch == '\r' || ch == '\t' ) {
1292             // If the character is not printable, print as character reference.
1293             // Non printables are below ASCII space but not tab or line
1294             // terminator, ASCII delete, or above a certain Unicode threshold.
1295             if (ch < 0x10000) {
1296                 printer.printText((char)ch );
1297             } else {
1298                 printer.printText((char)(((ch-0x10000)>>10)+0xd800));
1299                 printer.printText((char)(((ch-0x10000)&0x3ff)+0xdc00));
1300             }
1301 
1302         } else {
1303             printer.printText( "&#x" );
1304             printer.printText(Integer.toHexString(ch));
1305             printer.printText( ';' );
1306         }
1307     }
1308 
1309 
1310     /**
1311      * Escapes a string so it may be printed as text content or attribute
1312      * value. Non printable characters are escaped using character references.
1313      * Where the format specifies a deault entity reference, that reference
1314      * is used (e.g. <tt>&amp;lt;</tt>).
1315      *
1316      * @param source The string to escape
1317      */
1318     protected void printEscaped( String source )
1319         throws IOException
1320     {
1321         for ( int i = 0 ; i < source.length() ; ++i ) {
1322             int ch = source.charAt(i);
1323             if ((ch & 0xfc00) == 0xd800 && i+1 < source.length()) {
1324                 int lowch = source.charAt(i+1);
1325                 if ((lowch & 0xfc00) == 0xdc00) {
1326                     ch = 0x10000 + ((ch-0xd800)<<10) + lowch-0xdc00;
1327                     i++;
1328                 }
1329             }
1330             printEscaped(ch);
1331         }
1332     }
1333 
1334 
1335     //--------------------------------//
1336     // Element state handling methods //
1337     //--------------------------------//
1338 
1339 
1340     /**
1341      * Return the state of the current element.
1342      *
1343      * @return Current element state
1344      */
1345     protected ElementState getElementState()
1346     {
1347         return elementStates[ elementStateCount ];
1348     }
1349 
1350 
1351     /**
1352      * Enter a new element state for the specified element.
1353      * Tag name and space preserving is specified, element
1354      * state is initially empty.
1355      *
1356      * @return Current element state, or null
1357      */
1358     protected ElementState enterElementState( String namespaceURI, String localName,
1359                                               String rawName, boolean preserveSpace )
1360     {
1361         ElementState state;
1362 
1363         if ( elementStateCount + 1 == elementStates.length ) {
1364             ElementState[] newStates;
1365 
1366             // Need to create a larger array of states. This does not happen
1367             // often, unless the document is really deep.
1368             newStates = new ElementState[ elementStates.length + 10 ];
1369             for ( int i = 0 ; i < elementStates.length ; ++i )
1370                 newStates[ i ] = elementStates[ i ];
1371             for ( int i = elementStates.length ; i < newStates.length ; ++i )
1372                 newStates[ i ] = new ElementState();
1373             elementStates = newStates;
1374         }
1375 
1376         ++elementStateCount;
1377         state = elementStates[ elementStateCount ];
1378         state.namespaceURI = namespaceURI;
1379         state.localName = localName;
1380         state.rawName = rawName;
1381         state.preserveSpace = preserveSpace;
1382         state.empty = true;
1383         state.afterElement = false;
1384         state.afterComment = false;
1385         state.doCData = state.inCData = false;
1386         state.unescaped = false;
1387         state.prefixes = prefixes;
1388 
1389         prefixes = null;
1390         return state;
1391     }
1392 
1393 
1394     /**
1395      * Leave the current element state and return to the
1396      * state of the parent element. If this was the root
1397      * element, return to the state of the document.
1398      *
1399      * @return Previous element state
1400      */
1401     protected ElementState leaveElementState()
1402     {
1403         if ( elementStateCount > 0 ) {
1404             /*Corrected by David Blondeau (blondeau@intalio.com)*/
1405         prefixes = null;
1406         //_prefixes = _elementStates[ _elementStateCount ].prefixes;
1407             -- elementStateCount;
1408             return elementStates[ elementStateCount ];
1409         } else
1410             throw new IllegalStateException( "Internal error: element state is zero" );
1411     }
1412 
1413 
1414     /**
1415      * Returns true if in the state of the document.
1416      * Returns true before entering any element and after
1417      * leaving the root element.
1418      *
1419      * @return True if in the state of the document
1420      */
1421     protected boolean isDocumentState()
1422     {
1423         return elementStateCount == 0;
1424     }
1425 
1426 
1427     /**
1428      * Returns the namespace prefix for the specified URI.
1429      * If the URI has been mapped to a prefix, returns the
1430      * prefix, otherwise returns null.
1431      *
1432      * @param namespaceURI The namespace URI
1433      * @return The namespace prefix if known, or null
1434      */
1435     protected String getPrefix( String namespaceURI )
1436     {
1437         String    prefix;
1438 
1439         if ( prefixes != null ) {
1440             prefix = (String) prefixes.get( namespaceURI );
1441             if ( prefix != null )
1442                 return prefix;
1443         }
1444         if ( elementStateCount == 0 )
1445             return null;
1446         else {
1447             for ( int i = elementStateCount ; i > 0 ; --i ) {
1448                 if ( elementStates[ i ].prefixes != null ) {
1449                     prefix = (String) elementStates[ i ].prefixes.get( namespaceURI );
1450                     if ( prefix != null )
1451                         return prefix;
1452                 }
1453             }
1454         }
1455         return null;
1456     }
1457 
1458 
1459 }