001    /**
002     *
003     * Copyright 2006 The Apache Software Foundation
004     *
005     *  Licensed under the Apache License, Version 2.0 (the "License");
006     *  you may not use this file except in compliance with the License.
007     *  You may obtain a copy of the License at
008     *
009     *     http://www.apache.org/licenses/LICENSE-2.0
010     *
011     *  Unless required by applicable law or agreed to in writing, software
012     *  distributed under the License is distributed on an "AS IS" BASIS,
013     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     *  See the License for the specific language governing permissions and
015     *  limitations under the License.
016     */
017    
018    /*
019     * This code has been borrowed from the Apache Xerces project. We're copying the code to
020     * keep from adding a dependency on Xerces in the Geronimo kernel.
021     */
022    
023    package org.apache.geronimo.system.configuration;
024    
025    import java.io.Writer;
026    import java.io.OutputStream;
027    import java.io.IOException;
028    import java.util.Vector;
029    import java.util.Hashtable;
030    
031    import org.w3c.dom.Document;
032    import org.w3c.dom.DocumentFragment;
033    import org.w3c.dom.DocumentType;
034    import org.w3c.dom.Element;
035    import org.w3c.dom.Node;
036    import org.xml.sax.Locator;
037    import org.xml.sax.SAXException;
038    
039    /**
040     * Base class for a serializer supporting both DOM and SAX pretty
041     * serializing of XML/HTML/XHTML documents. Derives classes perform
042     * the method-specific serializing, this class provides the common
043     * serializing mechanisms.
044     * <p>
045     * The serializer must be initialized with the proper writer and
046     * output format before it can be used by calling {@link #init}.
047     * The serializer can be reused any number of times, but cannot
048     * be used concurrently by two threads.
049     * <p>
050     * If an output stream is used, the encoding is taken from the
051     * output format (defaults to <tt>UTF-8</tt>). If a writer is
052     * used, make sure the writer uses the same encoding (if applies)
053     * as specified in the output format.
054     * <p>
055     * The serializer supports both DOM and SAX. DOM serializing is done
056     * by calling {@link #serialize} and SAX serializing is done by firing
057     * SAX events and using the serializer as a document handler.
058     * This also applies to derived class.
059     * <p>
060     * If an I/O exception occurs while serializing, the serializer
061     * will not throw an exception directly, but only throw it
062     * at the end of serializing (either DOM or SAX's {@link
063     * org.xml.sax.DocumentHandler#endDocument}.
064     * <p>
065     * For elements that are not specified as whitespace preserving,
066     * the serializer will potentially break long text lines at space
067     * boundaries, indent lines, and serialize elements on separate
068     * lines. Line terminators will be regarded as spaces, and
069     * spaces at beginning of line will be stripped.
070     * <p>
071     * When indenting, the serializer is capable of detecting seemingly
072     * element content, and serializing these elements indented on separate
073     * lines. An element is serialized indented when it is the first or
074     * last child of an element, or immediate following or preceding
075     * another element.
076     *
077     *
078     * @version $Revision: 410741 $ $Date: 2006-05-31 21:35:48 -0700 (Wed, 31 May 2006) $
079     * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
080     * @see Serializer
081     * @see DOMSerializer
082     */
083    public abstract class BaseMarkupSerializer
084    {
085    
086        private EncodingInfo encodingInfo;
087    
088        /**
089         * Holds array of all element states that have been entered.
090         * The array is automatically resized. When leaving an element,
091         * it's state is not removed but reused when later returning
092         * to the same nesting level.
093         */
094        private ElementState[]  elementStates;
095    
096        /**
097         * The index of the next state to place in the array,
098         * or one plus the index of the current state. When zero,
099         * we are in no state.
100         */
101        private int             elementStateCount;
102    
103        /**
104         * Vector holding comments and PIs that come before the root
105         * element (even after it), see {@link #serializePreRoot}.
106         */
107        private Vector          preRoot;
108    
109        /**
110         * If the document has been started (header serialized), this
111         * flag is set to true so it's not started twice.
112         */
113        protected boolean       started;
114    
115        /**
116         * True if the serializer has been prepared. This flag is set
117         * to false when the serializer is reset prior to using it,
118         * and to true after it has been prepared for usage.
119         */
120        private boolean         prepared;
121    
122        /**
123         * Association between namespace URIs (keys) and prefixes (values).
124         * Accumulated here prior to starting an element and placing this
125         * list in the element state.
126         */
127        protected Hashtable     prefixes;
128    
129        /**
130         * The system identifier of the document type, if known.
131         */
132        protected String        docTypePublicId;
133    
134    
135        /**
136         * The system identifier of the document type, if known.
137         */
138        protected String        docTypeSystemId;
139    
140    
141        /**
142         * The output format associated with this serializer. This will never
143         * be a null reference. If no format was passed to the constructor,
144         * the default one for this document type will be used. The format
145         * object is never changed by the serializer.
146         */
147        protected OutputFormat   format;
148    
149    
150        /**
151         * The printer used for printing text parts.
152         */
153        protected Printer       printer;
154    
155    
156        /**
157         * True if indenting printer.
158         */
159        protected boolean       indenting;
160    
161    
162        /**
163         * The underlying writer.
164         */
165        private Writer          writer;
166    
167    
168        /**
169         * The output stream.
170         */
171        private OutputStream    output;
172    
173    
174        //--------------------------------//
175        // Constructor and initialization //
176        //--------------------------------//
177    
178    
179        /**
180         * Protected constructor can only be used by derived class.
181         * Must initialize the serializer before serializing any document,
182         * see {@link #init}.
183         */
184        protected BaseMarkupSerializer( OutputFormat format )
185        {
186            int i;
187    
188            elementStates = new ElementState[ 10 ];
189            for ( i = 0 ; i < elementStates.length ; ++i )
190                elementStates[ i ] = new ElementState();
191            this.format = format;
192        }
193    
194    
195        public void setOutputByteStream( OutputStream output )
196        {
197            if ( output == null )
198                throw new NullPointerException( "SER001 Argument 'output' is null." );
199            this.output = output;
200            writer = null;
201            reset();
202        }
203    
204    
205        public void setOutputCharStream( Writer writer )
206        {
207            if ( writer == null )
208                throw new NullPointerException( "SER001 Argument 'writer' is null." );
209            this.writer = writer;
210            output = null;
211            reset();
212        }
213    
214    
215        public void setOutputFormat( OutputFormat format )
216        {
217            if ( format == null )
218                throw new NullPointerException( "SER001 Argument 'format' is null." );
219            this.format = format;
220            reset();
221        }
222    
223    
224        public boolean reset()
225        {
226            if ( elementStateCount > 1 )
227                throw new IllegalStateException( "Serializer reset in the middle of serialization" );
228            prepared = false;
229            return true;
230        }
231    
232    
233        protected void prepare()
234            throws IOException
235        {
236            if ( prepared )
237                return;
238    
239            if ( writer == null && output == null )
240                throw new IOException( "SER002 No writer supplied for serializer" );
241            // If the output stream has been set, use it to construct
242            // the writer. It is possible that the serializer has been
243            // reused with the same output stream and different encoding.
244    
245            encodingInfo = format.getEncodingInfo();
246    
247            if ( output != null ) {
248                writer = encodingInfo.getWriter(output);
249            }
250    
251            if ( format.getIndenting() ) {
252                indenting = true;
253                printer = new IndentPrinter( writer, format );
254            } else {
255                indenting = false;
256                printer = new Printer( writer, format );
257            }
258    
259            ElementState state;
260    
261            elementStateCount = 0;
262            state = elementStates[ 0 ];
263            state.namespaceURI = null;
264            state.localName = null;
265            state.rawName = null;
266            state.preserveSpace = format.getPreserveSpace();
267            state.empty = true;
268            state.afterElement = false;
269            state.afterComment = false;
270            state.doCData = state.inCData = false;
271            state.prefixes = null;
272    
273            docTypePublicId = format.getDoctypePublic();
274            docTypeSystemId = format.getDoctypeSystem();
275            started = false;
276            prepared = true;
277        }
278    
279    
280    
281        //----------------------------------//
282        // DOM document serializing methods //
283        //----------------------------------//
284    
285    
286        /**
287         * Serializes the DOM element using the previously specified
288         * writer and output format. Throws an exception only if
289         * an I/O exception occured while serializing.
290         *
291         * @param elem The element to serialize
292         * @throws IOException An I/O exception occured while
293         *   serializing
294         */
295        public void serialize( Element elem )
296            throws IOException
297        {
298            prepare();
299            serializeNode( elem );
300            printer.flush();
301            if ( printer.getException() != null )
302                throw printer.getException();
303        }
304    
305    
306        /**
307         * Serializes the DOM document fragmnt using the previously specified
308         * writer and output format. Throws an exception only if
309         * an I/O exception occured while serializing.
310         *
311         * @param elem The element to serialize
312         * @throws IOException An I/O exception occured while
313         *   serializing
314         */
315        public void serialize( DocumentFragment frag )
316            throws IOException
317        {
318            prepare();
319            serializeNode( frag );
320            printer.flush();
321            if ( printer.getException() != null )
322                throw printer.getException();
323        }
324    
325    
326        /**
327         * Serializes the DOM document using the previously specified
328         * writer and output format. Throws an exception only if
329         * an I/O exception occured while serializing.
330         *
331         * @param doc The document to serialize
332         * @throws IOException An I/O exception occured while
333         *   serializing
334         */
335        public void serialize( Document doc )
336            throws IOException
337        {
338            prepare();
339            serializeNode( doc );
340            serializePreRoot();
341            printer.flush();
342            if ( printer.getException() != null )
343                throw printer.getException();
344        }
345    
346    
347        //------------------------------------------//
348        // SAX document handler serializing methods //
349        //------------------------------------------//
350    
351    
352        public void startDocument()
353            throws SAXException
354        {
355            try {
356                prepare();
357            } catch ( IOException except ) {
358                throw new SAXException( except.toString() );
359            }
360            // Nothing to do here. All the magic happens in startDocument(String)
361        }
362        
363        
364        public void characters( char[] chars, int start, int length )
365            throws SAXException
366        {
367            ElementState state;
368    
369            try {
370            state = content();
371    
372            // Check if text should be print as CDATA section or unescaped
373            // based on elements listed in the output format (the element
374            // state) or whether we are inside a CDATA section or entity.
375    
376            if ( state.inCData || state.doCData ) {
377                int          saveIndent;
378    
379                // Print a CDATA section. The text is not escaped, but ']]>'
380                // appearing in the code must be identified and dealt with.
381                // The contents of a text node is considered space preserving.
382                if ( ! state.inCData ) {
383                    printer.printText( "<![CDATA[" );
384                    state.inCData = true;
385                }
386                saveIndent = printer.getNextIndent();
387                printer.setNextIndent( 0 );
388                for ( int index = 0 ; index < length ; ++index ) {
389                    if ( index + 2 < length && chars[ index ] == ']' &&
390                         chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) {
391    
392                        printText( chars, start, index + 2, true, true );
393                        printer.printText( "]]><![CDATA[" );
394                        start += index + 2;
395                        length -= index + 2;
396                        index = 0;
397                    }
398                }
399                if ( length > 0 )
400                    printText( chars, start, length, true, true );
401                printer.setNextIndent( saveIndent );
402    
403            } else {
404    
405                int saveIndent;
406    
407                if ( state.preserveSpace ) {
408                    // If preserving space then hold of indentation so no
409                    // excessive spaces are printed at line breaks, escape
410                    // the text content without replacing spaces and print
411                    // the text breaking only at line breaks.
412                    saveIndent = printer.getNextIndent();
413                    printer.setNextIndent( 0 );
414                    printText( chars, start, length, true, state.unescaped );
415                    printer.setNextIndent( saveIndent );
416                } else {
417                    printText( chars, start, length, false, state.unescaped );
418                }
419            }
420            } catch ( IOException except ) {
421                throw new SAXException( except );
422            }
423        }
424    
425    
426        public void ignorableWhitespace( char[] chars, int start, int length )
427            throws SAXException
428        {
429            int i;
430    
431            try {
432            content();
433    
434            // Print ignorable whitespaces only when indenting, after
435            // all they are indentation. Cancel the indentation to
436            // not indent twice.
437            if ( indenting ) {
438                printer.setThisIndent( 0 );
439                for ( i = start ; length-- > 0 ; ++i )
440                    printer.printText( chars[ i ] );
441            }
442            } catch ( IOException except ) {
443                throw new SAXException( except );
444            }
445        }
446    
447    
448        public final void processingInstruction( String target, String code )
449            throws SAXException
450        {
451            try {
452                processingInstructionIO( target, code );
453            } catch ( IOException except ) {
454            throw new SAXException( except );
455            }
456        }
457    
458        public void processingInstructionIO( String target, String code )
459            throws IOException
460        {
461            int          index;
462            StringBuffer buffer;
463            ElementState state;
464    
465            state = content();
466            buffer = new StringBuffer( 40 );
467    
468            // Create the processing instruction textual representation.
469            // Make sure we don't have '?>' inside either target or code.
470            index = target.indexOf( "?>" );
471            if ( index >= 0 )
472                buffer.append( "<?" ).append( target.substring( 0, index ) );
473            else
474                buffer.append( "<?" ).append( target );
475            if ( code != null ) {
476                buffer.append( ' ' );
477                index = code.indexOf( "?>" );
478                if ( index >= 0 )
479                    buffer.append( code.substring( 0, index ) );
480                else
481                    buffer.append( code );
482            }
483            buffer.append( "?>" );
484    
485            // If before the root element (or after it), do not print
486            // the PI directly but place it in the pre-root vector.
487            if ( isDocumentState() ) {
488                if ( preRoot == null )
489                    preRoot = new Vector();
490                preRoot.addElement( buffer.toString() );
491            } else {
492                printer.indent();
493                printText( buffer.toString(), true, true );
494                printer.unindent();
495                if ( indenting )
496                state.afterElement = true;
497            }
498        }
499    
500    
501        public void comment( char[] chars, int start, int length )
502            throws SAXException
503        {
504            try {
505            comment( new String( chars, start, length ) );
506            } catch ( IOException except ) {
507                throw new SAXException( except );
508        }
509        }
510    
511    
512        public void comment( String text )
513            throws IOException
514        {
515            StringBuffer buffer;
516            int          index;
517            ElementState state;
518            
519            if ( format.getOmitComments() )
520                return;
521    
522            state  = content();
523            buffer = new StringBuffer( 40 );
524            // Create the processing comment textual representation.
525            // Make sure we don't have '-->' inside the comment.
526            index = text.indexOf( "-->" );
527            if ( index >= 0 )
528                buffer.append( "<!--" ).append( text.substring( 0, index ) ).append( "-->" );
529            else
530                buffer.append( "<!--" ).append( text ).append( "-->" );
531    
532            // If before the root element (or after it), do not print
533            // the comment directly but place it in the pre-root vector.
534            if ( isDocumentState() ) {
535                if ( preRoot == null )
536                    preRoot = new Vector();
537                preRoot.addElement( buffer.toString() );
538            } else {
539                // Indent this element on a new line if the first
540                // content of the parent element or immediately
541                // following an element.
542                if ( indenting && ! state.preserveSpace)
543                    printer.breakLine();
544                            printer.indent();
545                printText( buffer.toString(), true, true );
546                            printer.unindent();
547                if ( indenting )
548                    state.afterElement = true;
549            }
550                    state.afterComment = true;
551                    state.afterElement = false;
552        }
553    
554    
555        public void startCDATA()
556        {
557            ElementState state;
558    
559            state = getElementState();
560            state.doCData = true;
561        }
562    
563    
564        public void endCDATA()
565        {
566            ElementState state;
567    
568            state = getElementState();
569            state.doCData = false;
570        }
571    
572    
573        public void startNonEscaping()
574        {
575            ElementState state;
576    
577            state = getElementState();
578            state.unescaped = true;
579        }
580    
581    
582        public void endNonEscaping()
583        {
584            ElementState state;
585    
586            state = getElementState();
587            state.unescaped = false;
588        }
589    
590    
591        public void startPreserving()
592        {
593            ElementState state;
594    
595            state = getElementState();
596            state.preserveSpace = true;
597        }
598    
599    
600        public void endPreserving()
601        {
602            ElementState state;
603    
604            state = getElementState();
605            state.preserveSpace = false;
606        }
607    
608    
609        /**
610         * Called at the end of the document to wrap it up.
611         * Will flush the output stream and throw an exception
612         * if any I/O error occured while serializing.
613         *
614         * @throws SAXException An I/O exception occured during
615         *  serializing
616         */
617        public void endDocument()
618            throws SAXException
619        {
620            try {
621            // Print all the elements accumulated outside of
622            // the root element.
623            serializePreRoot();
624            // Flush the output, this is necessary for buffered output.
625            printer.flush();
626            } catch ( IOException except ) {
627                throw new SAXException( except );
628        }
629        }
630    
631    
632        public void startEntity( String name )
633        {
634            // ???
635        }
636    
637    
638        public void endEntity( String name )
639        {
640            // ???
641        }
642    
643    
644        public void setDocumentLocator( Locator locator )
645        {
646            // Nothing to do
647        }
648    
649    
650        //-----------------------------------------//
651        // SAX content handler serializing methods //
652        //-----------------------------------------//
653    
654    
655        public void skippedEntity ( String name )
656            throws SAXException
657        {
658            try {
659            endCDATA();
660            content();
661            printer.printText( '&' );
662            printer.printText( name );
663            printer.printText( ';' );
664            } catch ( IOException except ) {
665                throw new SAXException( except );
666        }
667        }
668    
669    
670        public void startPrefixMapping( String prefix, String uri )
671            throws SAXException
672        {
673            if ( prefixes == null )
674                prefixes = new Hashtable();
675            prefixes.put( uri, prefix == null ? "" : prefix );
676        }
677    
678    
679        public void endPrefixMapping( String prefix )
680            throws SAXException
681        {
682        }
683    
684    
685        //------------------------------------------//
686        // SAX DTD/Decl handler serializing methods //
687        //------------------------------------------//
688    
689    
690        public final void startDTD( String name, String publicId, String systemId )
691            throws SAXException
692        {
693            try {
694            printer.enterDTD();
695            docTypePublicId = publicId;
696            docTypeSystemId = systemId;
697            } catch ( IOException except ) {
698                throw new SAXException( except );
699            }
700        }
701    
702    
703        public void endDTD()
704        {
705            // Nothing to do here, all the magic occurs in startDocument(String).
706        }
707    
708    
709        public void elementDecl( String name, String model )
710            throws SAXException
711        {
712            try {
713            printer.enterDTD();
714            printer.printText( "<!ELEMENT " );
715            printer.printText( name );
716            printer.printText( ' ' );
717            printer.printText( model );
718            printer.printText( '>' );
719            if ( indenting )
720                printer.breakLine();
721            } catch ( IOException except ) {
722                throw new SAXException( except );
723            }
724        }
725    
726    
727        public void attributeDecl( String eName, String aName, String type,
728                                   String valueDefault, String value )
729            throws SAXException
730        {
731            try {
732            printer.enterDTD();
733            printer.printText( "<!ATTLIST " );
734            printer.printText( eName );
735            printer.printText( ' ' );
736            printer.printText( aName );
737            printer.printText( ' ' );
738            printer.printText( type );
739            if ( valueDefault != null ) {
740                printer.printText( ' ' );
741                printer.printText( valueDefault );
742            }
743            if ( value != null ) {
744                printer.printText( " \"" );
745                printEscaped( value );
746                printer.printText( '"' );
747            }
748            printer.printText( '>' );
749            if ( indenting )
750                printer.breakLine();
751            } catch ( IOException except ) {
752                throw new SAXException( except );
753        }
754        }
755    
756    
757        public void internalEntityDecl( String name, String value )
758            throws SAXException
759        {
760            try {
761            printer.enterDTD();
762            printer.printText( "<!ENTITY " );
763            printer.printText( name );
764            printer.printText( " \"" );
765            printEscaped( value );
766            printer.printText( "\">" );
767            if ( indenting )
768                printer.breakLine();
769            } catch ( IOException except ) {
770                throw new SAXException( except );
771            }
772        }
773    
774    
775        public void externalEntityDecl( String name, String publicId, String systemId )
776            throws SAXException
777        {
778            try {
779            printer.enterDTD();
780            unparsedEntityDecl( name, publicId, systemId, null );
781            } catch ( IOException except ) {
782                throw new SAXException( except );
783            }
784        }
785    
786    
787        public void unparsedEntityDecl( String name, String publicId,
788                                        String systemId, String notationName )
789            throws SAXException
790        {
791            try {
792            printer.enterDTD();
793            if ( publicId == null ) {
794                printer.printText( "<!ENTITY " );
795                printer.printText( name );
796                printer.printText( " SYSTEM " );
797                printDoctypeURL( systemId );
798            } else {
799                printer.printText( "<!ENTITY " );
800                printer.printText( name );
801                printer.printText( " PUBLIC " );
802                printDoctypeURL( publicId );
803                printer.printText( ' ' );
804                printDoctypeURL( systemId );
805            }
806            if ( notationName != null ) {
807                printer.printText( " NDATA " );
808                printer.printText( notationName );
809            }
810            printer.printText( '>' );
811            if ( indenting )
812                printer.breakLine();
813            } catch ( IOException except ) {
814                throw new SAXException( except );
815        }
816        }
817    
818    
819        public void notationDecl( String name, String publicId, String systemId )
820            throws SAXException
821        {
822            try {
823            printer.enterDTD();
824            if ( publicId != null ) {
825                printer.printText( "<!NOTATION " );
826                printer.printText( name );
827                printer.printText( " PUBLIC " );
828                printDoctypeURL( publicId );
829                if ( systemId != null ) {
830                    printer.printText( ' ' );
831                    printDoctypeURL( systemId );
832                }
833            } else {
834                printer.printText( "<!NOTATION " );
835                printer.printText( name );
836                printer.printText( " SYSTEM " );
837                printDoctypeURL( systemId );
838            }
839            printer.printText( '>' );
840            if ( indenting )
841                printer.breakLine();
842            } catch ( IOException except ) {
843                throw new SAXException( except );
844            }
845        }
846    
847    
848        //------------------------------------------//
849        // Generic node serializing methods methods //
850        //------------------------------------------//
851    
852    
853        /**
854         * Serialize the DOM node. This method is shared across XML, HTML and XHTML
855         * serializers and the differences are masked out in a separate {@link
856         * #serializeElement}.
857         *
858         * @param node The node to serialize
859         * @see #serializeElement
860         * @throws IOException An I/O exception occured while
861         *   serializing
862         */
863        protected void serializeNode( Node node )
864            throws IOException
865        {
866            // Based on the node type call the suitable SAX handler.
867            // Only comments entities and documents which are not
868            // handled by SAX are serialized directly.
869            switch ( node.getNodeType() ) {
870            case Node.TEXT_NODE : {
871                String text;
872    
873                text = node.getNodeValue();
874                if ( text != null )
875                    if ( !indenting || getElementState().preserveSpace
876                         || (text.replace('\n',' ').trim().length() != 0))
877                        characters( text );
878                break;
879            }
880    
881            case Node.CDATA_SECTION_NODE : {
882                String text;
883    
884                text = node.getNodeValue();
885                if ( text != null ) {
886                    startCDATA();
887                    characters( text );
888                    endCDATA();
889                }
890                break;
891            }
892    
893            case Node.COMMENT_NODE : {
894                String text;
895    
896                if ( ! format.getOmitComments() ) {
897                    text = node.getNodeValue();
898                    if ( text != null )
899                        comment( text );
900                }
901                break;
902            }
903    
904            case Node.ENTITY_REFERENCE_NODE : {
905                Node         child;
906    
907                endCDATA();
908                content();
909                child = node.getFirstChild();
910                while ( child != null ) {
911                    serializeNode( child );
912                    child = child.getNextSibling();
913                }
914                break;
915            }
916    
917            case Node.PROCESSING_INSTRUCTION_NODE :
918                processingInstructionIO( node.getNodeName(), node.getNodeValue() );
919                break;
920    
921            case Node.ELEMENT_NODE :
922                serializeElement( (Element) node );
923                break;
924    
925            case Node.DOCUMENT_NODE : {
926                DocumentType      docType;
927    
928                // If there is a document type, use the SAX events to
929                // serialize it.
930                docType = ( (Document) node ).getDoctype();
931                if (docType != null) {
932                    // DOM Level 2 (or higher)
933                    // TODO: result of the following call was assigned to a local variable that was never
934                    // read. Can the call be deleted?
935                    ( (Document) node ).getImplementation();
936                    try {
937                        String internal;
938    
939                        printer.enterDTD();
940                        docTypePublicId = docType.getPublicId();
941                        docTypeSystemId = docType.getSystemId();
942                        internal = docType.getInternalSubset();
943                        if ( internal != null && internal.length() > 0 )
944                            printer.printText( internal );
945                        endDTD();
946                    }
947                    // DOM Level 1 -- does implementation have methods?
948                    catch (NoSuchMethodError nsme) {
949                        Class docTypeClass = docType.getClass();
950    
951                        String docTypePublicId = null;
952                        String docTypeSystemId = null;
953                        try {
954                            java.lang.reflect.Method getPublicId = docTypeClass.getMethod("getPublicId", null);
955                            if (getPublicId.getReturnType().equals(String.class)) {
956                                docTypePublicId = (String)getPublicId.invoke(docType, null);
957                            }
958                        }
959                        catch (Exception e) {
960                            // ignore
961                        }
962                        try {
963                            java.lang.reflect.Method getSystemId = docTypeClass.getMethod("getSystemId", null);
964                            if (getSystemId.getReturnType().equals(String.class)) {
965                                docTypeSystemId = (String)getSystemId.invoke(docType, null);
966                            }
967                        }
968                        catch (Exception e) {
969                            // ignore
970                        }
971                        this.printer.enterDTD();
972                        this.docTypePublicId = docTypePublicId;
973                        this.docTypeSystemId = docTypeSystemId;
974                        endDTD();
975                    }
976                }
977                // !! Fall through
978            }
979            case Node.DOCUMENT_FRAGMENT_NODE : {
980                Node         child;
981    
982                // By definition this will happen if the node is a document,
983                // document fragment, etc. Just serialize its contents. It will
984                // work well for other nodes that we do not know how to serialize.
985                child = node.getFirstChild();
986                while ( child != null ) {
987                    serializeNode( child );
988                    child = child.getNextSibling();
989                }
990                break;
991            }
992    
993            default:
994                break;
995            }
996        }
997    
998    
999        /**
1000         * Must be called by a method about to print any type of content.
1001         * If the element was just opened, the opening tag is closed and
1002         * will be matched to a closing tag. Returns the current element
1003         * state with <tt>empty</tt> and <tt>afterElement</tt> set to false.
1004         *
1005         * @return The current element state
1006         * @throws IOException An I/O exception occured while
1007         *   serializing
1008         */
1009        protected ElementState content()
1010            throws IOException
1011        {
1012            ElementState state;
1013    
1014            state = getElementState();
1015            if ( ! isDocumentState() ) {
1016                // Need to close CData section first
1017                if ( state.inCData && ! state.doCData ) {
1018                    printer.printText( "]]>" );
1019                    state.inCData = false;
1020                }
1021                // If this is the first content in the element,
1022                // change the state to not-empty and close the
1023                // opening element tag.
1024                if ( state.empty ) {
1025                    printer.printText( '>' );
1026                    state.empty = false;
1027                }
1028                // Except for one content type, all of them
1029                // are not last element. That one content
1030                // type will take care of itself.
1031                state.afterElement = false;
1032                // Except for one content type, all of them
1033                // are not last comment. That one content
1034                // type will take care of itself.
1035                state.afterComment = false;
1036            }
1037            return state;
1038        }
1039    
1040    
1041        /**
1042         * Called to print the text contents in the prevailing element format.
1043         * Since this method is capable of printing text as CDATA, it is used
1044         * for that purpose as well. White space handling is determined by the
1045         * current element state. In addition, the output format can dictate
1046         * whether the text is printed as CDATA or unescaped.
1047         *
1048         * @param text The text to print
1049         * @param unescaped True is should print unescaped
1050         * @throws IOException An I/O exception occured while
1051         *   serializing
1052         */
1053        protected void characters( String text )
1054            throws IOException
1055        {
1056            ElementState state;
1057    
1058            state = content();
1059            // Check if text should be print as CDATA section or unescaped
1060            // based on elements listed in the output format (the element
1061            // state) or whether we are inside a CDATA section or entity.
1062    
1063            if ( state.inCData || state.doCData ) {
1064                StringBuffer buffer;
1065                int          index;
1066                int          saveIndent;
1067    
1068                // Print a CDATA section. The text is not escaped, but ']]>'
1069                // appearing in the code must be identified and dealt with.
1070                // The contents of a text node is considered space preserving.
1071                buffer = new StringBuffer( text.length() );
1072                if ( ! state.inCData ) {
1073                    buffer.append( "<![CDATA[" );
1074                    state.inCData = true;
1075                }
1076                index = text.indexOf( "]]>" );
1077                while ( index >= 0 ) {
1078                    buffer.append( text.substring( 0, index + 2 ) ).append( "]]><![CDATA[" );
1079                    text = text.substring( index + 2 );
1080                    index = text.indexOf( "]]>" );
1081                }
1082                buffer.append( text );
1083                saveIndent = printer.getNextIndent();
1084                printer.setNextIndent( 0 );
1085                printText( buffer.toString(), true, true );
1086                printer.setNextIndent( saveIndent );
1087    
1088            } else {
1089    
1090                int saveIndent;
1091    
1092                if ( state.preserveSpace ) {
1093                    // If preserving space then hold of indentation so no
1094                    // excessive spaces are printed at line breaks, escape
1095                    // the text content without replacing spaces and print
1096                    // the text breaking only at line breaks.
1097                    saveIndent = printer.getNextIndent();
1098                    printer.setNextIndent( 0 );
1099                    printText( text, true, state.unescaped );
1100                    printer.setNextIndent( saveIndent );
1101                } else {
1102                    printText( text, false, state.unescaped );
1103                }
1104            }
1105        }
1106    
1107    
1108        /**
1109         * Returns the suitable entity reference for this character value,
1110         * or null if no such entity exists. Calling this method with <tt>'&amp;'</tt>
1111         * will return <tt>"&amp;amp;"</tt>.
1112         *
1113         * @param ch Character value
1114         * @return Character entity name, or null
1115         */
1116        protected abstract String getEntityRef( int ch );
1117    
1118    
1119        /**
1120         * Called to serializee the DOM element. The element is serialized based on
1121         * the serializer's method (XML, HTML, XHTML).
1122         *
1123         * @param elem The element to serialize
1124         * @throws IOException An I/O exception occured while
1125         *   serializing
1126         */
1127        protected abstract void serializeElement( Element elem )
1128            throws IOException;
1129    
1130    
1131        /**
1132         * Comments and PIs cannot be serialized before the root element,
1133         * because the root element serializes the document type, which
1134         * generally comes first. Instead such PIs and comments are
1135         * accumulated inside a vector and serialized by calling this
1136         * method. Will be called when the root element is serialized
1137         * and when the document finished serializing.
1138         *
1139         * @throws IOException An I/O exception occured while
1140         *   serializing
1141         */
1142        protected void serializePreRoot()
1143            throws IOException
1144        {
1145            int i;
1146    
1147            if ( preRoot != null ) {
1148                for ( i = 0 ; i < preRoot.size() ; ++i ) {
1149                    printText( (String) preRoot.elementAt( i ), true, true );
1150                    if ( indenting )
1151                    printer.breakLine();
1152                }
1153                preRoot.removeAllElements();
1154            }
1155        }
1156    
1157    
1158        //---------------------------------------------//
1159        // Text pretty printing and formatting methods //
1160        //---------------------------------------------//
1161    
1162    
1163        /**
1164         * Called to print additional text with whitespace handling.
1165         * If spaces are preserved, the text is printed as if by calling
1166         * {@link #printText(String)} with a call to {@link #breakLine}
1167         * for each new line. If spaces are not preserved, the text is
1168         * broken at space boundaries if longer than the line width;
1169         * Multiple spaces are printed as such, but spaces at beginning
1170         * of line are removed.
1171         *
1172         * @param text The text to print
1173         * @param preserveSpace Space preserving flag
1174         * @param unescaped Print unescaped
1175         */
1176        protected final void printText( char[] chars, int start, int length,
1177                                        boolean preserveSpace, boolean unescaped )
1178            throws IOException
1179        {
1180            char ch;
1181    
1182            if ( preserveSpace ) {
1183                // Preserving spaces: the text must print exactly as it is,
1184                // without breaking when spaces appear in the text and without
1185                // consolidating spaces. If a line terminator is used, a line
1186                // break will occur.
1187                while ( length-- > 0 ) {
1188                    ch = chars[ start ];
1189                    ++start;
1190                    if ( ch == '\n' || ch == '\r' || unescaped )
1191                        printer.printText( ch );
1192                    else
1193                        printEscaped( ch );
1194                }
1195            } else {
1196                // Not preserving spaces: print one part at a time, and
1197                // use spaces between parts to break them into different
1198                // lines. Spaces at beginning of line will be stripped
1199                // by printing mechanism. Line terminator is treated
1200                // no different than other text part.
1201                while ( length-- > 0 ) {
1202                    ch = chars[ start ];
1203                    ++start;
1204                    if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' )
1205                        printer.printSpace();
1206                    else if ( unescaped )
1207                        printer.printText( ch );
1208                    else
1209                        printEscaped( ch );
1210                }
1211            }
1212        }
1213    
1214    
1215        protected final void printText( String text, boolean preserveSpace, boolean unescaped )
1216            throws IOException
1217        {
1218            int index;
1219            char ch;
1220    
1221            if ( preserveSpace ) {
1222                // Preserving spaces: the text must print exactly as it is,
1223                // without breaking when spaces appear in the text and without
1224                // consolidating spaces. If a line terminator is used, a line
1225                // break will occur.
1226                for ( index = 0 ; index < text.length() ; ++index ) {
1227                    ch = text.charAt( index );
1228                    if ( ch == '\n' || ch == '\r' || unescaped )
1229                        printer.printText( ch );
1230                    else
1231                        printEscaped( ch );
1232                }
1233            } else {
1234                // Not preserving spaces: print one part at a time, and
1235                // use spaces between parts to break them into different
1236                // lines. Spaces at beginning of line will be stripped
1237                // by printing mechanism. Line terminator is treated
1238                // no different than other text part.
1239                for ( index = 0 ; index < text.length() ; ++index ) {
1240                    ch = text.charAt( index );
1241                    if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' )
1242                        printer.printSpace();
1243                    else if ( unescaped )
1244                        printer.printText( ch );
1245                    else
1246                        printEscaped( ch );
1247                }
1248            }
1249        }
1250    
1251    
1252        /**
1253         * Print a document type public or system identifier URL.
1254         * Encapsulates the URL in double quotes, escapes non-printing
1255         * characters and print it equivalent to {@link #printText}.
1256         *
1257         * @param url The document type url to print
1258         */
1259        protected void printDoctypeURL( String url )
1260            throws IOException
1261        {
1262            int                i;
1263    
1264            printer.printText( '"' );
1265            for( i = 0 ; i < url.length() ; ++i ) {
1266                if ( url.charAt( i ) == '"' ||  url.charAt( i ) < 0x20 || url.charAt( i ) > 0x7F ) {
1267                    printer.printText( '%' );
1268                    printer.printText( Integer.toHexString( url.charAt( i ) ) );
1269                } else
1270                    printer.printText( url.charAt( i ) );
1271            }
1272            printer.printText( '"' );
1273        }
1274    
1275    
1276        protected void printEscaped( int ch )
1277            throws IOException
1278        {
1279            String charRef;
1280    
1281            // If there is a suitable entity reference for this
1282            // character, print it. The list of available entity
1283            // references is almost but not identical between
1284            // XML and HTML.
1285            charRef = getEntityRef( ch );
1286            if ( charRef != null ) {
1287                printer.printText( '&' );
1288                printer.printText( charRef );
1289                printer.printText( ';' );
1290            } else if ( ( ch >= ' ' && encodingInfo.isPrintable(ch) && ch != 0xF7 ) ||
1291                        ch == '\n' || ch == '\r' || ch == '\t' ) {
1292                // If the character is not printable, print as character reference.
1293                // Non printables are below ASCII space but not tab or line
1294                // terminator, ASCII delete, or above a certain Unicode threshold.
1295                if (ch < 0x10000) {
1296                    printer.printText((char)ch );
1297                } else {
1298                    printer.printText((char)(((ch-0x10000)>>10)+0xd800));
1299                    printer.printText((char)(((ch-0x10000)&0x3ff)+0xdc00));
1300                }
1301    
1302            } else {
1303                printer.printText( "&#x" );
1304                printer.printText(Integer.toHexString(ch));
1305                printer.printText( ';' );
1306            }
1307        }
1308    
1309    
1310        /**
1311         * Escapes a string so it may be printed as text content or attribute
1312         * value. Non printable characters are escaped using character references.
1313         * Where the format specifies a deault entity reference, that reference
1314         * is used (e.g. <tt>&amp;lt;</tt>).
1315         *
1316         * @param source The string to escape
1317         */
1318        protected void printEscaped( String source )
1319            throws IOException
1320        {
1321            for ( int i = 0 ; i < source.length() ; ++i ) {
1322                int ch = source.charAt(i);
1323                if ((ch & 0xfc00) == 0xd800 && i+1 < source.length()) {
1324                    int lowch = source.charAt(i+1);
1325                    if ((lowch & 0xfc00) == 0xdc00) {
1326                        ch = 0x10000 + ((ch-0xd800)<<10) + lowch-0xdc00;
1327                        i++;
1328                    }
1329                }
1330                printEscaped(ch);
1331            }
1332        }
1333    
1334    
1335        //--------------------------------//
1336        // Element state handling methods //
1337        //--------------------------------//
1338    
1339    
1340        /**
1341         * Return the state of the current element.
1342         *
1343         * @return Current element state
1344         */
1345        protected ElementState getElementState()
1346        {
1347            return elementStates[ elementStateCount ];
1348        }
1349    
1350    
1351        /**
1352         * Enter a new element state for the specified element.
1353         * Tag name and space preserving is specified, element
1354         * state is initially empty.
1355         *
1356         * @return Current element state, or null
1357         */
1358        protected ElementState enterElementState( String namespaceURI, String localName,
1359                                                  String rawName, boolean preserveSpace )
1360        {
1361            ElementState state;
1362    
1363            if ( elementStateCount + 1 == elementStates.length ) {
1364                ElementState[] newStates;
1365    
1366                // Need to create a larger array of states. This does not happen
1367                // often, unless the document is really deep.
1368                newStates = new ElementState[ elementStates.length + 10 ];
1369                for ( int i = 0 ; i < elementStates.length ; ++i )
1370                    newStates[ i ] = elementStates[ i ];
1371                for ( int i = elementStates.length ; i < newStates.length ; ++i )
1372                    newStates[ i ] = new ElementState();
1373                elementStates = newStates;
1374            }
1375    
1376            ++elementStateCount;
1377            state = elementStates[ elementStateCount ];
1378            state.namespaceURI = namespaceURI;
1379            state.localName = localName;
1380            state.rawName = rawName;
1381            state.preserveSpace = preserveSpace;
1382            state.empty = true;
1383            state.afterElement = false;
1384            state.afterComment = false;
1385            state.doCData = state.inCData = false;
1386            state.unescaped = false;
1387            state.prefixes = prefixes;
1388    
1389            prefixes = null;
1390            return state;
1391        }
1392    
1393    
1394        /**
1395         * Leave the current element state and return to the
1396         * state of the parent element. If this was the root
1397         * element, return to the state of the document.
1398         *
1399         * @return Previous element state
1400         */
1401        protected ElementState leaveElementState()
1402        {
1403            if ( elementStateCount > 0 ) {
1404                /*Corrected by David Blondeau (blondeau@intalio.com)*/
1405            prefixes = null;
1406            //_prefixes = _elementStates[ _elementStateCount ].prefixes;
1407                -- elementStateCount;
1408                return elementStates[ elementStateCount ];
1409            } else
1410                throw new IllegalStateException( "Internal error: element state is zero" );
1411        }
1412    
1413    
1414        /**
1415         * Returns true if in the state of the document.
1416         * Returns true before entering any element and after
1417         * leaving the root element.
1418         *
1419         * @return True if in the state of the document
1420         */
1421        protected boolean isDocumentState()
1422        {
1423            return elementStateCount == 0;
1424        }
1425    
1426    
1427        /**
1428         * Returns the namespace prefix for the specified URI.
1429         * If the URI has been mapped to a prefix, returns the
1430         * prefix, otherwise returns null.
1431         *
1432         * @param namespaceURI The namespace URI
1433         * @return The namespace prefix if known, or null
1434         */
1435        protected String getPrefix( String namespaceURI )
1436        {
1437            String    prefix;
1438    
1439            if ( prefixes != null ) {
1440                prefix = (String) prefixes.get( namespaceURI );
1441                if ( prefix != null )
1442                    return prefix;
1443            }
1444            if ( elementStateCount == 0 )
1445                return null;
1446            else {
1447                for ( int i = elementStateCount ; i > 0 ; --i ) {
1448                    if ( elementStates[ i ].prefixes != null ) {
1449                        prefix = (String) elementStates[ i ].prefixes.get( namespaceURI );
1450                        if ( prefix != null )
1451                            return prefix;
1452                    }
1453                }
1454            }
1455            return null;
1456        }
1457    
1458    
1459    }