View Javadoc

1   /**
2    *
3    * Copyright 2006 The Apache Software Foundation
4    *
5    *  Licensed under the Apache License, Version 2.0 (the "License");
6    *  you may not use this file except in compliance with the License.
7    *  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   */
17  
18  /*
19   * This code has been borrowed from the Apache Xerces project. We're copying the code to
20   * keep from adding a dependency on Xerces in the Geronimo kernel.
21   */
22  
23  package org.apache.geronimo.system.configuration;
24  
25  import java.io.IOException;
26  import java.io.OutputStream;
27  import java.io.Writer;
28  import java.util.Enumeration;
29  
30  import org.w3c.dom.Attr;
31  import org.w3c.dom.Element;
32  import org.w3c.dom.NamedNodeMap;
33  import org.w3c.dom.Node;
34  import org.xml.sax.AttributeList;
35  import org.xml.sax.Attributes;
36  import org.xml.sax.SAXException;
37  import org.xml.sax.helpers.AttributesImpl;
38  
39  
40  /**
41   * Implements an XML serializer supporting both DOM and SAX pretty
42   * serializing. For usage instructions see {@link Serializer}.
43   * <p>
44   * If an output stream is used, the encoding is taken from the
45   * output format (defaults to <tt>UTF-8</tt>). If a writer is
46   * used, make sure the writer uses the same encoding (if applies)
47   * as specified in the output format.
48   * <p>
49   * The serializer supports both DOM and SAX. DOM serializing is done
50   * by calling {@link #serialize} and SAX serializing is done by firing
51   * SAX events and using the serializer as a document handler.
52   * <p>
53   * If an I/O exception occurs while serializing, the serializer
54   * will not throw an exception directly, but only throw it
55   * at the end of serializing (either DOM or SAX's {@link
56   * org.xml.sax.DocumentHandler#endDocument}.
57   * <p>
58   * For elements that are not specified as whitespace preserving,
59   * the serializer will potentially break long text lines at space
60   * boundaries, indent lines, and serialize elements on separate
61   * lines. Line terminators will be regarded as spaces, and
62   * spaces at beginning of line will be stripped.
63   *
64   *
65   * @version $Revision: 410741 $ $Date: 2006-05-31 21:35:48 -0700 (Wed, 31 May 2006) $
66   * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
67   * @see Serializer
68   */
69  public class XMLSerializer extends BaseMarkupSerializer
70  {
71  
72      /**
73       * Constructs a new serializer. The serializer cannot be used without
74       * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
75       * first.
76       */
77      public XMLSerializer()
78      {
79          super( new OutputFormat( Method.XML, null, false ) );
80      }
81  
82  
83      /**
84       * Constructs a new serializer. The serializer cannot be used without
85       * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
86       * first.
87       */
88      public XMLSerializer( OutputFormat format )
89      {
90          super( format != null ? format : new OutputFormat( Method.XML, null, false ) );
91          this.format.setMethod( Method.XML );
92      }
93  
94  
95      /**
96       * Constructs a new serializer that writes to the specified writer
97       * using the specified output format. If <tt>format</tt> is null,
98       * will use a default output format.
99       *
100      * @param writer The writer to use
101      * @param format The output format to use, null for the default
102      */
103     public XMLSerializer( Writer writer, OutputFormat format )
104     {
105         super( format != null ? format : new OutputFormat( Method.XML, null, false ) );
106         this.format.setMethod( Method.XML );
107         setOutputCharStream( writer );
108     }
109 
110 
111     /**
112      * Constructs a new serializer that writes to the specified output
113      * stream using the specified output format. If <tt>format</tt>
114      * is null, will use a default output format.
115      *
116      * @param output The output stream to use
117      * @param format The output format to use, null for the default
118      */
119     public XMLSerializer( OutputStream output, OutputFormat format )
120     {
121         super( format != null ? format : new OutputFormat( Method.XML, null, false ) );
122         this.format.setMethod( Method.XML );
123         setOutputByteStream( output );
124     }
125 
126 
127     public void setOutputFormat( OutputFormat format )
128     {
129         super.setOutputFormat( format != null ? format : new OutputFormat( Method.XML, null, false ) );
130     }
131 
132 
133     //-----------------------------------------//
134     // SAX content handler serializing methods //
135     //-----------------------------------------//
136 
137 
138     public void startElement( String namespaceURI, String localName,
139                               String rawName, Attributes attrs )
140         throws SAXException
141     {
142         int          i;
143         boolean      preserveSpace;
144         ElementState state;
145         String       name;
146         String       value;
147 
148         try {
149         if ( printer == null )
150             throw new IllegalStateException( "SER002 No writer supplied for serializer" );
151 
152         state = getElementState();
153         if ( isDocumentState() ) {
154             // If this is the root element handle it differently.
155             // If the first root element in the document, serialize
156             // the document's DOCTYPE. Space preserving defaults
157             // to that of the output format.
158             if ( ! started )
159                     startDocument( ( localName == null || localName.length() == 0 ) ? rawName : localName );
160         } else {
161             // For any other element, if first in parent, then
162             // close parent's opening tag and use the parnet's
163             // space preserving.
164             if ( state.empty )
165                 printer.printText( '>' );
166             // Must leave CData section first
167             if ( state.inCData )
168             {
169                 printer.printText( "]]>" );
170                 state.inCData = false;
171             }
172             // Indent this element on a new line if the first
173             // content of the parent element or immediately
174             // following an element or a comment
175             if ( indenting && ! state.preserveSpace &&
176                  ( state.empty || state.afterElement || state.afterComment) )
177                 printer.breakLine();
178         }
179         preserveSpace = state.preserveSpace;
180 
181             //We remove the namespaces from the attributes list so that they will
182             //be in _prefixes
183             attrs = extractNamespaces(attrs);
184 
185         // Do not change the current element state yet.
186         // This only happens in endElement().
187             if ( rawName == null || rawName.length() == 0 ) {
188                 if ( localName == null )
189                     throw new SAXException( "No rawName and localName is null" );
190                 if ( namespaceURI != null && ! namespaceURI.equals( "" ) ) {
191                 String prefix;
192                 prefix = getPrefix( namespaceURI );
193                     if ( prefix != null && prefix.length() > 0 )
194                     rawName = prefix + ":" + localName;
195                     else
196                         rawName = localName;
197                 } else
198                     rawName = localName;
199         }
200 
201         printer.printText( '<' );
202         printer.printText( rawName );
203         printer.indent();
204 
205         // For each attribute print it's name and value as one part,
206         // separated with a space so the element can be broken on
207         // multiple lines.
208         if ( attrs != null ) {
209             for ( i = 0 ; i < attrs.getLength() ; ++i ) {
210                 printer.printSpace();
211 
212                 name = attrs.getQName( i );
213                     if ( name != null && name.length() == 0 ) {
214                     String prefix;
215                     String attrURI;
216 
217                     name = attrs.getLocalName( i );
218                     attrURI = attrs.getURI( i );
219                         if ( ( attrURI != null && attrURI.length() != 0 ) &&
220                              ( namespaceURI == null || namespaceURI.length() == 0 ||
221                                               ! attrURI.equals( namespaceURI ) ) ) {
222                         prefix = getPrefix( attrURI );
223                         if ( prefix != null && prefix.length() > 0 )
224                             name = prefix + ":" + name;
225                     }
226                 }
227 
228                 value = attrs.getValue( i );
229                 if ( value == null )
230                     value = "";
231                 printer.printText( name );
232                 printer.printText( "=\"" );
233                 printEscaped( value );
234                 printer.printText( '"' );
235 
236                 // If the attribute xml:space exists, determine whether
237                 // to preserve spaces in this and child nodes based on
238                 // its value.
239                 if ( name.equals( "xml:space" ) ) {
240                     if ( value.equals( "preserve" ) )
241                         preserveSpace = true;
242                     else
243                         preserveSpace = format.getPreserveSpace();
244                 }
245             }
246         }
247 
248             if ( prefixes != null ) {
249             Enumeration keyEnum; 
250 
251             keyEnum = prefixes.keys();
252             while ( keyEnum.hasMoreElements() ) {
253                 printer.printSpace();
254                 value = (String) keyEnum.nextElement();
255                 name = (String) prefixes.get( value );
256                 if ( name.length() == 0 ) {
257                     printer.printText( "xmlns=\"" );
258                     printEscaped( value );
259                     printer.printText( '"' );
260                 } else {
261                     printer.printText( "xmlns:" );
262                     printer.printText( name );
263                     printer.printText( "=\"" );
264                     printEscaped( value );
265                     printer.printText( '"' );
266                 }
267             }
268         }
269 
270         // Now it's time to enter a new element state
271         // with the tag name and space preserving.
272         // We still do not change the curent element state.
273         state = enterElementState( namespaceURI, localName, rawName, preserveSpace );
274             name = ( localName == null || localName.length() == 0 ) ? rawName : namespaceURI + "^" + localName;
275             state.doCData = format.isCDataElement( name );
276             state.unescaped = format.isNonEscapingElement( name );
277         } catch ( IOException except ) {
278             throw new SAXException( except );
279         }
280     }
281 
282 
283     public void endElement( String namespaceURI, String localName,
284                             String rawName )
285         throws SAXException
286     {
287         try {
288             endElementIO( namespaceURI, localName, rawName );
289         } catch ( IOException except ) {
290             throw new SAXException( except );
291         }
292     }
293 
294 
295     public void endElementIO( String namespaceURI, String localName,
296                             String rawName )
297         throws IOException
298     {
299         ElementState state;
300 
301         // Works much like content() with additions for closing
302         // an element. Note the different checks for the closed
303         // element's state and the parent element's state.
304         printer.unindent();
305         state = getElementState();
306         if ( state.empty ) {
307             printer.printText( "/>" );
308         } else {
309             // Must leave CData section first
310             if ( state.inCData )
311                 printer.printText( "]]>" );
312             // This element is not empty and that last content was
313             // another element, so print a line break before that
314             // last element and this element's closing tag.
315             if ( indenting && ! state.preserveSpace && (state.afterElement || state.afterComment) )
316                 printer.breakLine();
317             printer.printText( "</" );
318             printer.printText( state.rawName );
319             printer.printText( '>' );
320         }
321         // Leave the element state and update that of the parent
322         // (if we're not root) to not empty and after element.
323         state = leaveElementState();
324         state.afterElement = true;
325         state.afterComment = false;
326         state.empty = false;
327         if ( isDocumentState() )
328             printer.flush();
329     }
330 
331 
332     //------------------------------------------//
333     // SAX document handler serializing methods //
334     //------------------------------------------//
335 
336 
337     public void startElement( String tagName, AttributeList attrs )
338         throws SAXException
339     {
340         int          i;
341         boolean      preserveSpace;
342         ElementState state;
343         String       name;
344         String       value;
345 
346         try {
347         if ( printer == null )
348             throw new IllegalStateException( "SER002 No writer supplied for serializer" );
349 
350         state = getElementState();
351         if ( isDocumentState() ) {
352             // If this is the root element handle it differently.
353             // If the first root element in the document, serialize
354             // the document's DOCTYPE. Space preserving defaults
355             // to that of the output format.
356             if ( ! started )
357                 startDocument( tagName );
358         } else {
359             // For any other element, if first in parent, then
360             // close parent's opening tag and use the parnet's
361             // space preserving.
362             if ( state.empty )
363                 printer.printText( '>' );
364             // Must leave CData section first
365             if ( state.inCData )
366             {
367                 printer.printText( "]]>" );
368                 state.inCData = false;
369             }
370             // Indent this element on a new line if the first
371             // content of the parent element or immediately
372             // following an element.
373             if ( indenting && ! state.preserveSpace &&
374                  ( state.empty || state.afterElement || state.afterComment) )
375                 printer.breakLine();
376         }
377         preserveSpace = state.preserveSpace;
378 
379         // Do not change the current element state yet.
380         // This only happens in endElement().
381 
382         printer.printText( '<' );
383         printer.printText( tagName );
384         printer.indent();
385 
386         // For each attribute print it's name and value as one part,
387         // separated with a space so the element can be broken on
388         // multiple lines.
389         if ( attrs != null ) {
390             for ( i = 0 ; i < attrs.getLength() ; ++i ) {
391                 printer.printSpace();
392                 name = attrs.getName( i );
393                 value = attrs.getValue( i );
394                 if ( value != null ) {
395                     printer.printText( name );
396                     printer.printText( "=\"" );
397                     printEscaped( value );
398                     printer.printText( '"' );
399                 }
400 
401                 // If the attribute xml:space exists, determine whether
402                 // to preserve spaces in this and child nodes based on
403                 // its value.
404                 if ( name.equals( "xml:space" ) ) {
405                     if ( value.equals( "preserve" ) )
406                         preserveSpace = true;
407                     else
408                         preserveSpace = format.getPreserveSpace();
409                 }
410             }
411         }
412         // Now it's time to enter a new element state
413         // with the tag name and space preserving.
414         // We still do not change the curent element state.
415         state = enterElementState( null, null, tagName, preserveSpace );
416         state.doCData = format.isCDataElement( tagName );
417         state.unescaped = format.isNonEscapingElement( tagName );
418         } catch ( IOException except ) {
419             throw new SAXException( except );
420     }
421 
422     }
423 
424 
425     public void endElement( String tagName )
426         throws SAXException
427     {
428         endElement( null, null, tagName );
429     }
430 
431 
432 
433     //------------------------------------------//
434     // Generic node serializing methods methods //
435     //------------------------------------------//
436 
437 
438     /**
439      * Called to serialize the document's DOCTYPE by the root element.
440      * The document type declaration must name the root element,
441      * but the root element is only known when that element is serialized,
442      * and not at the start of the document.
443      * <p>
444      * This method will check if it has not been called before ({@link #started}),
445      * will serialize the document type declaration, and will serialize all
446      * pre-root comments and PIs that were accumulated in the document
447      * (see {@link #serializePreRoot}). Pre-root will be serialized even if
448      * this is not the first root element of the document.
449      */
450     protected void startDocument( String rootTagName )
451         throws IOException
452     {
453         int    i;
454         String dtd;
455 
456         dtd = printer.leaveDTD();
457         if ( ! started ) {
458 
459             if ( ! format.getOmitXMLDeclaration() ) {
460                 StringBuffer    buffer;
461 
462                 // Serialize the document declaration appreaing at the head
463                 // of very XML document (unless asked not to).
464                 buffer = new StringBuffer( "<?xml version=\"" );
465                 if ( format.getVersion() != null )
466                     buffer.append( format.getVersion() );
467                 else
468                     buffer.append( "1.0" );
469                 buffer.append( '"' );
470                 if ( format.getEncoding() != null ) {
471                     buffer.append( " encoding=\"" );
472                     buffer.append( format.getEncoding() );
473                     buffer.append( '"' );
474                 }
475                 if ( format.getStandalone() && docTypeSystemId == null &&
476                      docTypePublicId == null )
477                     buffer.append( " standalone=\"yes\"" );
478                 buffer.append( "?>" );
479                 printer.printText( buffer );
480                 printer.breakLine();
481             }
482 
483             if ( ! format.getOmitDocumentType() ) {
484                 if ( docTypeSystemId != null ) {
485                     // System identifier must be specified to print DOCTYPE.
486                     // If public identifier is specified print 'PUBLIC
487                     // <public> <system>', if not, print 'SYSTEM <system>'.
488                     printer.printText( "<!DOCTYPE " );
489                     printer.printText( rootTagName );
490                     if ( docTypePublicId != null ) {
491                         printer.printText( " PUBLIC " );
492                         printDoctypeURL( docTypePublicId );
493                         if ( indenting ) {
494                             printer.breakLine();
495                             for ( i = 0 ; i < 18 + rootTagName.length() ; ++i )
496                                 printer.printText( " " );
497                         } else
498                             printer.printText( " " );
499                     printDoctypeURL( docTypeSystemId );
500                     }
501                     else {
502                         printer.printText( " SYSTEM " );
503                         printDoctypeURL( docTypeSystemId );
504                     }
505 
506                     // If we accumulated any DTD contents while printing.
507                     // this would be the place to print it.
508                     if ( dtd != null && dtd.length() > 0 ) {
509                         printer.printText( " [" );
510                         printText( dtd, true, true );
511                         printer.printText( ']' );
512                     }
513 
514                     printer.printText( ">" );
515                     printer.breakLine();
516                 } else if ( dtd != null && dtd.length() > 0 ) {
517                     printer.printText( "<!DOCTYPE " );
518                     printer.printText( rootTagName );
519                     printer.printText( " [" );
520                     printText( dtd, true, true );
521                     printer.printText( "]>" );
522                     printer.breakLine();
523                 }
524             }
525         }
526         started = true;
527         // Always serialize these, even if not te first root element.
528         serializePreRoot();
529     }
530 
531 
532     /**
533      * Called to serialize a DOM element. Equivalent to calling {@link
534      * #startElement}, {@link #endElement} and serializing everything
535      * inbetween, but better optimized.
536      */
537     protected void serializeElement( Element elem )
538         throws IOException
539     {
540         Attr         attr;
541         NamedNodeMap attrMap;
542         int          i;
543         Node         child;
544         ElementState state;
545         boolean      preserveSpace;
546         String       name;
547         String       value;
548         String       tagName;
549 
550         tagName = elem.getTagName();
551         state = getElementState();
552         if ( isDocumentState() ) {
553             // If this is the root element handle it differently.
554             // If the first root element in the document, serialize
555             // the document's DOCTYPE. Space preserving defaults
556             // to that of the output format.
557             if ( ! started )
558                 startDocument( tagName );
559         } else {
560             // For any other element, if first in parent, then
561             // close parent's opening tag and use the parnet's
562             // space preserving.
563             if ( state.empty )
564                 printer.printText( '>' );
565             // Must leave CData section first
566             if ( state.inCData )
567             {
568                 printer.printText( "]]>" );
569                 state.inCData = false;
570             }
571             // Indent this element on a new line if the first
572             // content of the parent element or immediately
573             // following an element.
574             if ( indenting && ! state.preserveSpace &&
575                  ( state.empty || state.afterElement || state.afterComment) )
576                 printer.breakLine();
577         }
578         preserveSpace = state.preserveSpace;
579 
580         // Do not change the current element state yet.
581         // This only happens in endElement().
582 
583         printer.printText( '<' );
584         printer.printText( tagName );
585         printer.indent();
586 
587         // Lookup the element's attribute, but only print specified
588         // attributes. (Unspecified attributes are derived from the DTD.
589         // For each attribute print it's name and value as one part,
590         // separated with a space so the element can be broken on
591         // multiple lines.
592         attrMap = elem.getAttributes();
593         if ( attrMap != null ) {
594             for ( i = 0 ; i < attrMap.getLength() ; ++i ) {
595                 attr = (Attr) attrMap.item( i );
596                 name = attr.getName();
597                 value = attr.getValue();
598                 if ( value == null )
599                     value = "";
600                 if ( attr.getSpecified() ) {
601                     printer.printSpace();
602                     printer.printText( name );
603                     printer.printText( "=\"" );
604                     printEscaped( value );
605                     printer.printText( '"' );
606                 }
607                 // If the attribute xml:space exists, determine whether
608                 // to preserve spaces in this and child nodes based on
609                 // its value.
610                 if ( name.equals( "xml:space" ) ) {
611                     if ( value.equals( "preserve" ) )
612                         preserveSpace = true;
613                     else
614                         preserveSpace = format.getPreserveSpace();
615                 }
616             }
617         }
618 
619         // If element has children, then serialize them, otherwise
620         // serialize en empty tag.
621         if ( elem.hasChildNodes() ) {
622             // Enter an element state, and serialize the children
623             // one by one. Finally, end the element.
624             state = enterElementState( null, null, tagName, preserveSpace );
625             state.doCData = format.isCDataElement( tagName );
626             state.unescaped = format.isNonEscapingElement( tagName );
627             child = elem.getFirstChild();
628             while ( child != null ) {
629                 serializeNode( child );
630                 child = child.getNextSibling();
631             }
632             endElementIO( null, null, tagName );
633         } else {
634             printer.unindent();
635             printer.printText( "/>" );
636             // After element but parent element is no longer empty.
637             state.afterElement = true;
638             state.afterComment = false;
639             state.empty = false;
640             if ( isDocumentState() )
641                 printer.flush();
642         }
643     }
644 
645 
646     protected String getEntityRef( int ch )
647     {
648         // Encode special XML characters into the equivalent character references.
649         // These five are defined by default for all XML documents.
650         switch ( ch ) {
651         case '<':
652             return "lt";
653         case '>':
654             return "gt";
655         case '"':
656             return "quot";
657         case '\'':
658             return "apos";
659         case '&':
660             return "amp";
661         }
662         return null;
663     }
664 
665 
666     /** Retrieve and remove the namespaces declarations from the list of attributes.
667      *
668      */
669     private Attributes extractNamespaces( Attributes attrs )
670         throws SAXException
671     {
672         AttributesImpl attrsOnly;
673         String         rawName;
674         int            i;
675         int            length;
676 
677         length = attrs.getLength();
678         attrsOnly = new AttributesImpl( attrs );
679 
680         for ( i = length - 1 ; i >= 0 ; --i ) {
681             rawName = attrsOnly.getQName( i );
682 
683             //We have to exclude the namespaces declarations from the attributes
684             //Append only when the feature http://xml.org/sax/features/namespace-prefixes"
685             //is TRUE
686             if ( rawName.startsWith( "xmlns" ) ) {
687                 if (rawName.length() == 5) {
688                     startPrefixMapping( "", attrs.getValue( i ) );
689                     attrsOnly.removeAttribute( i );
690                 } else if (rawName.charAt(5) == ':') {
691                     startPrefixMapping(rawName.substring(6), attrs.getValue(i));
692                     attrsOnly.removeAttribute( i );
693                 }
694             }
695         }
696         return attrsOnly;
697     }
698 }