001    /**
002     *
003     * Copyright 2006 The Apache Software Foundation
004     *
005     *  Licensed under the Apache License, Version 2.0 (the "License");
006     *  you may not use this file except in compliance with the License.
007     *  You may obtain a copy of the License at
008     *
009     *     http://www.apache.org/licenses/LICENSE-2.0
010     *
011     *  Unless required by applicable law or agreed to in writing, software
012     *  distributed under the License is distributed on an "AS IS" BASIS,
013     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     *  See the License for the specific language governing permissions and
015     *  limitations under the License.
016     */
017    
018    /*
019     * This code has been borrowed from the Apache Xerces project. We're copying the code to
020     * keep from adding a dependency on Xerces in the Geronimo kernel.
021     */
022    
023    package org.apache.geronimo.system.configuration;
024    
025    import java.io.IOException;
026    import java.io.OutputStream;
027    import java.io.Writer;
028    import java.util.Enumeration;
029    
030    import org.w3c.dom.Attr;
031    import org.w3c.dom.Element;
032    import org.w3c.dom.NamedNodeMap;
033    import org.w3c.dom.Node;
034    import org.xml.sax.AttributeList;
035    import org.xml.sax.Attributes;
036    import org.xml.sax.SAXException;
037    import org.xml.sax.helpers.AttributesImpl;
038    
039    
040    /**
041     * Implements an XML serializer supporting both DOM and SAX pretty
042     * serializing. For usage instructions see {@link Serializer}.
043     * <p>
044     * If an output stream is used, the encoding is taken from the
045     * output format (defaults to <tt>UTF-8</tt>). If a writer is
046     * used, make sure the writer uses the same encoding (if applies)
047     * as specified in the output format.
048     * <p>
049     * The serializer supports both DOM and SAX. DOM serializing is done
050     * by calling {@link #serialize} and SAX serializing is done by firing
051     * SAX events and using the serializer as a document handler.
052     * <p>
053     * If an I/O exception occurs while serializing, the serializer
054     * will not throw an exception directly, but only throw it
055     * at the end of serializing (either DOM or SAX's {@link
056     * org.xml.sax.DocumentHandler#endDocument}.
057     * <p>
058     * For elements that are not specified as whitespace preserving,
059     * the serializer will potentially break long text lines at space
060     * boundaries, indent lines, and serialize elements on separate
061     * lines. Line terminators will be regarded as spaces, and
062     * spaces at beginning of line will be stripped.
063     *
064     *
065     * @version $Revision: 410741 $ $Date: 2006-05-31 21:35:48 -0700 (Wed, 31 May 2006) $
066     * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
067     * @see Serializer
068     */
069    public class XMLSerializer extends BaseMarkupSerializer
070    {
071    
072        /**
073         * Constructs a new serializer. The serializer cannot be used without
074         * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
075         * first.
076         */
077        public XMLSerializer()
078        {
079            super( new OutputFormat( Method.XML, null, false ) );
080        }
081    
082    
083        /**
084         * Constructs a new serializer. The serializer cannot be used without
085         * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
086         * first.
087         */
088        public XMLSerializer( OutputFormat format )
089        {
090            super( format != null ? format : new OutputFormat( Method.XML, null, false ) );
091            this.format.setMethod( Method.XML );
092        }
093    
094    
095        /**
096         * Constructs a new serializer that writes to the specified writer
097         * using the specified output format. If <tt>format</tt> is null,
098         * will use a default output format.
099         *
100         * @param writer The writer to use
101         * @param format The output format to use, null for the default
102         */
103        public XMLSerializer( Writer writer, OutputFormat format )
104        {
105            super( format != null ? format : new OutputFormat( Method.XML, null, false ) );
106            this.format.setMethod( Method.XML );
107            setOutputCharStream( writer );
108        }
109    
110    
111        /**
112         * Constructs a new serializer that writes to the specified output
113         * stream using the specified output format. If <tt>format</tt>
114         * is null, will use a default output format.
115         *
116         * @param output The output stream to use
117         * @param format The output format to use, null for the default
118         */
119        public XMLSerializer( OutputStream output, OutputFormat format )
120        {
121            super( format != null ? format : new OutputFormat( Method.XML, null, false ) );
122            this.format.setMethod( Method.XML );
123            setOutputByteStream( output );
124        }
125    
126    
127        public void setOutputFormat( OutputFormat format )
128        {
129            super.setOutputFormat( format != null ? format : new OutputFormat( Method.XML, null, false ) );
130        }
131    
132    
133        //-----------------------------------------//
134        // SAX content handler serializing methods //
135        //-----------------------------------------//
136    
137    
138        public void startElement( String namespaceURI, String localName,
139                                  String rawName, Attributes attrs )
140            throws SAXException
141        {
142            int          i;
143            boolean      preserveSpace;
144            ElementState state;
145            String       name;
146            String       value;
147    
148            try {
149            if ( printer == null )
150                throw new IllegalStateException( "SER002 No writer supplied for serializer" );
151    
152            state = getElementState();
153            if ( isDocumentState() ) {
154                // If this is the root element handle it differently.
155                // If the first root element in the document, serialize
156                // the document's DOCTYPE. Space preserving defaults
157                // to that of the output format.
158                if ( ! started )
159                        startDocument( ( localName == null || localName.length() == 0 ) ? rawName : localName );
160            } else {
161                // For any other element, if first in parent, then
162                // close parent's opening tag and use the parnet's
163                // space preserving.
164                if ( state.empty )
165                    printer.printText( '>' );
166                // Must leave CData section first
167                if ( state.inCData )
168                {
169                    printer.printText( "]]>" );
170                    state.inCData = false;
171                }
172                // Indent this element on a new line if the first
173                // content of the parent element or immediately
174                // following an element or a comment
175                if ( indenting && ! state.preserveSpace &&
176                     ( state.empty || state.afterElement || state.afterComment) )
177                    printer.breakLine();
178            }
179            preserveSpace = state.preserveSpace;
180    
181                //We remove the namespaces from the attributes list so that they will
182                //be in _prefixes
183                attrs = extractNamespaces(attrs);
184    
185            // Do not change the current element state yet.
186            // This only happens in endElement().
187                if ( rawName == null || rawName.length() == 0 ) {
188                    if ( localName == null )
189                        throw new SAXException( "No rawName and localName is null" );
190                    if ( namespaceURI != null && ! namespaceURI.equals( "" ) ) {
191                    String prefix;
192                    prefix = getPrefix( namespaceURI );
193                        if ( prefix != null && prefix.length() > 0 )
194                        rawName = prefix + ":" + localName;
195                        else
196                            rawName = localName;
197                    } else
198                        rawName = localName;
199            }
200    
201            printer.printText( '<' );
202            printer.printText( rawName );
203            printer.indent();
204    
205            // For each attribute print it's name and value as one part,
206            // separated with a space so the element can be broken on
207            // multiple lines.
208            if ( attrs != null ) {
209                for ( i = 0 ; i < attrs.getLength() ; ++i ) {
210                    printer.printSpace();
211    
212                    name = attrs.getQName( i );
213                        if ( name != null && name.length() == 0 ) {
214                        String prefix;
215                        String attrURI;
216    
217                        name = attrs.getLocalName( i );
218                        attrURI = attrs.getURI( i );
219                            if ( ( attrURI != null && attrURI.length() != 0 ) &&
220                                 ( namespaceURI == null || namespaceURI.length() == 0 ||
221                                                  ! attrURI.equals( namespaceURI ) ) ) {
222                            prefix = getPrefix( attrURI );
223                            if ( prefix != null && prefix.length() > 0 )
224                                name = prefix + ":" + name;
225                        }
226                    }
227    
228                    value = attrs.getValue( i );
229                    if ( value == null )
230                        value = "";
231                    printer.printText( name );
232                    printer.printText( "=\"" );
233                    printEscaped( value );
234                    printer.printText( '"' );
235    
236                    // If the attribute xml:space exists, determine whether
237                    // to preserve spaces in this and child nodes based on
238                    // its value.
239                    if ( name.equals( "xml:space" ) ) {
240                        if ( value.equals( "preserve" ) )
241                            preserveSpace = true;
242                        else
243                            preserveSpace = format.getPreserveSpace();
244                    }
245                }
246            }
247    
248                if ( prefixes != null ) {
249                Enumeration keyEnum; 
250    
251                keyEnum = prefixes.keys();
252                while ( keyEnum.hasMoreElements() ) {
253                    printer.printSpace();
254                    value = (String) keyEnum.nextElement();
255                    name = (String) prefixes.get( value );
256                    if ( name.length() == 0 ) {
257                        printer.printText( "xmlns=\"" );
258                        printEscaped( value );
259                        printer.printText( '"' );
260                    } else {
261                        printer.printText( "xmlns:" );
262                        printer.printText( name );
263                        printer.printText( "=\"" );
264                        printEscaped( value );
265                        printer.printText( '"' );
266                    }
267                }
268            }
269    
270            // Now it's time to enter a new element state
271            // with the tag name and space preserving.
272            // We still do not change the curent element state.
273            state = enterElementState( namespaceURI, localName, rawName, preserveSpace );
274                name = ( localName == null || localName.length() == 0 ) ? rawName : namespaceURI + "^" + localName;
275                state.doCData = format.isCDataElement( name );
276                state.unescaped = format.isNonEscapingElement( name );
277            } catch ( IOException except ) {
278                throw new SAXException( except );
279            }
280        }
281    
282    
283        public void endElement( String namespaceURI, String localName,
284                                String rawName )
285            throws SAXException
286        {
287            try {
288                endElementIO( namespaceURI, localName, rawName );
289            } catch ( IOException except ) {
290                throw new SAXException( except );
291            }
292        }
293    
294    
295        public void endElementIO( String namespaceURI, String localName,
296                                String rawName )
297            throws IOException
298        {
299            ElementState state;
300    
301            // Works much like content() with additions for closing
302            // an element. Note the different checks for the closed
303            // element's state and the parent element's state.
304            printer.unindent();
305            state = getElementState();
306            if ( state.empty ) {
307                printer.printText( "/>" );
308            } else {
309                // Must leave CData section first
310                if ( state.inCData )
311                    printer.printText( "]]>" );
312                // This element is not empty and that last content was
313                // another element, so print a line break before that
314                // last element and this element's closing tag.
315                if ( indenting && ! state.preserveSpace && (state.afterElement || state.afterComment) )
316                    printer.breakLine();
317                printer.printText( "</" );
318                printer.printText( state.rawName );
319                printer.printText( '>' );
320            }
321            // Leave the element state and update that of the parent
322            // (if we're not root) to not empty and after element.
323            state = leaveElementState();
324            state.afterElement = true;
325            state.afterComment = false;
326            state.empty = false;
327            if ( isDocumentState() )
328                printer.flush();
329        }
330    
331    
332        //------------------------------------------//
333        // SAX document handler serializing methods //
334        //------------------------------------------//
335    
336    
337        public void startElement( String tagName, AttributeList attrs )
338            throws SAXException
339        {
340            int          i;
341            boolean      preserveSpace;
342            ElementState state;
343            String       name;
344            String       value;
345    
346            try {
347            if ( printer == null )
348                throw new IllegalStateException( "SER002 No writer supplied for serializer" );
349    
350            state = getElementState();
351            if ( isDocumentState() ) {
352                // If this is the root element handle it differently.
353                // If the first root element in the document, serialize
354                // the document's DOCTYPE. Space preserving defaults
355                // to that of the output format.
356                if ( ! started )
357                    startDocument( tagName );
358            } else {
359                // For any other element, if first in parent, then
360                // close parent's opening tag and use the parnet's
361                // space preserving.
362                if ( state.empty )
363                    printer.printText( '>' );
364                // Must leave CData section first
365                if ( state.inCData )
366                {
367                    printer.printText( "]]>" );
368                    state.inCData = false;
369                }
370                // Indent this element on a new line if the first
371                // content of the parent element or immediately
372                // following an element.
373                if ( indenting && ! state.preserveSpace &&
374                     ( state.empty || state.afterElement || state.afterComment) )
375                    printer.breakLine();
376            }
377            preserveSpace = state.preserveSpace;
378    
379            // Do not change the current element state yet.
380            // This only happens in endElement().
381    
382            printer.printText( '<' );
383            printer.printText( tagName );
384            printer.indent();
385    
386            // For each attribute print it's name and value as one part,
387            // separated with a space so the element can be broken on
388            // multiple lines.
389            if ( attrs != null ) {
390                for ( i = 0 ; i < attrs.getLength() ; ++i ) {
391                    printer.printSpace();
392                    name = attrs.getName( i );
393                    value = attrs.getValue( i );
394                    if ( value != null ) {
395                        printer.printText( name );
396                        printer.printText( "=\"" );
397                        printEscaped( value );
398                        printer.printText( '"' );
399                    }
400    
401                    // If the attribute xml:space exists, determine whether
402                    // to preserve spaces in this and child nodes based on
403                    // its value.
404                    if ( name.equals( "xml:space" ) ) {
405                        if ( value.equals( "preserve" ) )
406                            preserveSpace = true;
407                        else
408                            preserveSpace = format.getPreserveSpace();
409                    }
410                }
411            }
412            // Now it's time to enter a new element state
413            // with the tag name and space preserving.
414            // We still do not change the curent element state.
415            state = enterElementState( null, null, tagName, preserveSpace );
416            state.doCData = format.isCDataElement( tagName );
417            state.unescaped = format.isNonEscapingElement( tagName );
418            } catch ( IOException except ) {
419                throw new SAXException( except );
420        }
421    
422        }
423    
424    
425        public void endElement( String tagName )
426            throws SAXException
427        {
428            endElement( null, null, tagName );
429        }
430    
431    
432    
433        //------------------------------------------//
434        // Generic node serializing methods methods //
435        //------------------------------------------//
436    
437    
438        /**
439         * Called to serialize the document's DOCTYPE by the root element.
440         * The document type declaration must name the root element,
441         * but the root element is only known when that element is serialized,
442         * and not at the start of the document.
443         * <p>
444         * This method will check if it has not been called before ({@link #started}),
445         * will serialize the document type declaration, and will serialize all
446         * pre-root comments and PIs that were accumulated in the document
447         * (see {@link #serializePreRoot}). Pre-root will be serialized even if
448         * this is not the first root element of the document.
449         */
450        protected void startDocument( String rootTagName )
451            throws IOException
452        {
453            int    i;
454            String dtd;
455    
456            dtd = printer.leaveDTD();
457            if ( ! started ) {
458    
459                if ( ! format.getOmitXMLDeclaration() ) {
460                    StringBuffer    buffer;
461    
462                    // Serialize the document declaration appreaing at the head
463                    // of very XML document (unless asked not to).
464                    buffer = new StringBuffer( "<?xml version=\"" );
465                    if ( format.getVersion() != null )
466                        buffer.append( format.getVersion() );
467                    else
468                        buffer.append( "1.0" );
469                    buffer.append( '"' );
470                    if ( format.getEncoding() != null ) {
471                        buffer.append( " encoding=\"" );
472                        buffer.append( format.getEncoding() );
473                        buffer.append( '"' );
474                    }
475                    if ( format.getStandalone() && docTypeSystemId == null &&
476                         docTypePublicId == null )
477                        buffer.append( " standalone=\"yes\"" );
478                    buffer.append( "?>" );
479                    printer.printText( buffer );
480                    printer.breakLine();
481                }
482    
483                if ( ! format.getOmitDocumentType() ) {
484                    if ( docTypeSystemId != null ) {
485                        // System identifier must be specified to print DOCTYPE.
486                        // If public identifier is specified print 'PUBLIC
487                        // <public> <system>', if not, print 'SYSTEM <system>'.
488                        printer.printText( "<!DOCTYPE " );
489                        printer.printText( rootTagName );
490                        if ( docTypePublicId != null ) {
491                            printer.printText( " PUBLIC " );
492                            printDoctypeURL( docTypePublicId );
493                            if ( indenting ) {
494                                printer.breakLine();
495                                for ( i = 0 ; i < 18 + rootTagName.length() ; ++i )
496                                    printer.printText( " " );
497                            } else
498                                printer.printText( " " );
499                        printDoctypeURL( docTypeSystemId );
500                        }
501                        else {
502                            printer.printText( " SYSTEM " );
503                            printDoctypeURL( docTypeSystemId );
504                        }
505    
506                        // If we accumulated any DTD contents while printing.
507                        // this would be the place to print it.
508                        if ( dtd != null && dtd.length() > 0 ) {
509                            printer.printText( " [" );
510                            printText( dtd, true, true );
511                            printer.printText( ']' );
512                        }
513    
514                        printer.printText( ">" );
515                        printer.breakLine();
516                    } else if ( dtd != null && dtd.length() > 0 ) {
517                        printer.printText( "<!DOCTYPE " );
518                        printer.printText( rootTagName );
519                        printer.printText( " [" );
520                        printText( dtd, true, true );
521                        printer.printText( "]>" );
522                        printer.breakLine();
523                    }
524                }
525            }
526            started = true;
527            // Always serialize these, even if not te first root element.
528            serializePreRoot();
529        }
530    
531    
532        /**
533         * Called to serialize a DOM element. Equivalent to calling {@link
534         * #startElement}, {@link #endElement} and serializing everything
535         * inbetween, but better optimized.
536         */
537        protected void serializeElement( Element elem )
538            throws IOException
539        {
540            Attr         attr;
541            NamedNodeMap attrMap;
542            int          i;
543            Node         child;
544            ElementState state;
545            boolean      preserveSpace;
546            String       name;
547            String       value;
548            String       tagName;
549    
550            tagName = elem.getTagName();
551            state = getElementState();
552            if ( isDocumentState() ) {
553                // If this is the root element handle it differently.
554                // If the first root element in the document, serialize
555                // the document's DOCTYPE. Space preserving defaults
556                // to that of the output format.
557                if ( ! started )
558                    startDocument( tagName );
559            } else {
560                // For any other element, if first in parent, then
561                // close parent's opening tag and use the parnet's
562                // space preserving.
563                if ( state.empty )
564                    printer.printText( '>' );
565                // Must leave CData section first
566                if ( state.inCData )
567                {
568                    printer.printText( "]]>" );
569                    state.inCData = false;
570                }
571                // Indent this element on a new line if the first
572                // content of the parent element or immediately
573                // following an element.
574                if ( indenting && ! state.preserveSpace &&
575                     ( state.empty || state.afterElement || state.afterComment) )
576                    printer.breakLine();
577            }
578            preserveSpace = state.preserveSpace;
579    
580            // Do not change the current element state yet.
581            // This only happens in endElement().
582    
583            printer.printText( '<' );
584            printer.printText( tagName );
585            printer.indent();
586    
587            // Lookup the element's attribute, but only print specified
588            // attributes. (Unspecified attributes are derived from the DTD.
589            // For each attribute print it's name and value as one part,
590            // separated with a space so the element can be broken on
591            // multiple lines.
592            attrMap = elem.getAttributes();
593            if ( attrMap != null ) {
594                for ( i = 0 ; i < attrMap.getLength() ; ++i ) {
595                    attr = (Attr) attrMap.item( i );
596                    name = attr.getName();
597                    value = attr.getValue();
598                    if ( value == null )
599                        value = "";
600                    if ( attr.getSpecified() ) {
601                        printer.printSpace();
602                        printer.printText( name );
603                        printer.printText( "=\"" );
604                        printEscaped( value );
605                        printer.printText( '"' );
606                    }
607                    // If the attribute xml:space exists, determine whether
608                    // to preserve spaces in this and child nodes based on
609                    // its value.
610                    if ( name.equals( "xml:space" ) ) {
611                        if ( value.equals( "preserve" ) )
612                            preserveSpace = true;
613                        else
614                            preserveSpace = format.getPreserveSpace();
615                    }
616                }
617            }
618    
619            // If element has children, then serialize them, otherwise
620            // serialize en empty tag.
621            if ( elem.hasChildNodes() ) {
622                // Enter an element state, and serialize the children
623                // one by one. Finally, end the element.
624                state = enterElementState( null, null, tagName, preserveSpace );
625                state.doCData = format.isCDataElement( tagName );
626                state.unescaped = format.isNonEscapingElement( tagName );
627                child = elem.getFirstChild();
628                while ( child != null ) {
629                    serializeNode( child );
630                    child = child.getNextSibling();
631                }
632                endElementIO( null, null, tagName );
633            } else {
634                printer.unindent();
635                printer.printText( "/>" );
636                // After element but parent element is no longer empty.
637                state.afterElement = true;
638                state.afterComment = false;
639                state.empty = false;
640                if ( isDocumentState() )
641                    printer.flush();
642            }
643        }
644    
645    
646        protected String getEntityRef( int ch )
647        {
648            // Encode special XML characters into the equivalent character references.
649            // These five are defined by default for all XML documents.
650            switch ( ch ) {
651            case '<':
652                return "lt";
653            case '>':
654                return "gt";
655            case '"':
656                return "quot";
657            case '\'':
658                return "apos";
659            case '&':
660                return "amp";
661            }
662            return null;
663        }
664    
665    
666        /** Retrieve and remove the namespaces declarations from the list of attributes.
667         *
668         */
669        private Attributes extractNamespaces( Attributes attrs )
670            throws SAXException
671        {
672            AttributesImpl attrsOnly;
673            String         rawName;
674            int            i;
675            int            length;
676    
677            length = attrs.getLength();
678            attrsOnly = new AttributesImpl( attrs );
679    
680            for ( i = length - 1 ; i >= 0 ; --i ) {
681                rawName = attrsOnly.getQName( i );
682    
683                //We have to exclude the namespaces declarations from the attributes
684                //Append only when the feature http://xml.org/sax/features/namespace-prefixes"
685                //is TRUE
686                if ( rawName.startsWith( "xmlns" ) ) {
687                    if (rawName.length() == 5) {
688                        startPrefixMapping( "", attrs.getValue( i ) );
689                        attrsOnly.removeAttribute( i );
690                    } else if (rawName.charAt(5) == ':') {
691                        startPrefixMapping(rawName.substring(6), attrs.getValue(i));
692                        attrsOnly.removeAttribute( i );
693                    }
694                }
695            }
696            return attrsOnly;
697        }
698    }