001 /**
002 *
003 * Copyright 2006 The Apache Software Foundation
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 /*
019 * This code has been borrowed from the Apache Xerces project. We're copying the code to
020 * keep from adding a dependency on Xerces in the Geronimo kernel.
021 */
022
023 package org.apache.geronimo.system.configuration;
024
025 import java.io.IOException;
026 import java.io.OutputStream;
027 import java.io.Writer;
028 import java.util.Enumeration;
029
030 import org.w3c.dom.Attr;
031 import org.w3c.dom.Element;
032 import org.w3c.dom.NamedNodeMap;
033 import org.w3c.dom.Node;
034 import org.xml.sax.AttributeList;
035 import org.xml.sax.Attributes;
036 import org.xml.sax.SAXException;
037 import org.xml.sax.helpers.AttributesImpl;
038
039
040 /**
041 * Implements an XML serializer supporting both DOM and SAX pretty
042 * serializing. For usage instructions see {@link Serializer}.
043 * <p>
044 * If an output stream is used, the encoding is taken from the
045 * output format (defaults to <tt>UTF-8</tt>). If a writer is
046 * used, make sure the writer uses the same encoding (if applies)
047 * as specified in the output format.
048 * <p>
049 * The serializer supports both DOM and SAX. DOM serializing is done
050 * by calling {@link #serialize} and SAX serializing is done by firing
051 * SAX events and using the serializer as a document handler.
052 * <p>
053 * If an I/O exception occurs while serializing, the serializer
054 * will not throw an exception directly, but only throw it
055 * at the end of serializing (either DOM or SAX's {@link
056 * org.xml.sax.DocumentHandler#endDocument}.
057 * <p>
058 * For elements that are not specified as whitespace preserving,
059 * the serializer will potentially break long text lines at space
060 * boundaries, indent lines, and serialize elements on separate
061 * lines. Line terminators will be regarded as spaces, and
062 * spaces at beginning of line will be stripped.
063 *
064 *
065 * @version $Revision: 410741 $ $Date: 2006-05-31 21:35:48 -0700 (Wed, 31 May 2006) $
066 * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
067 * @see Serializer
068 */
069 public class XMLSerializer extends BaseMarkupSerializer
070 {
071
072 /**
073 * Constructs a new serializer. The serializer cannot be used without
074 * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
075 * first.
076 */
077 public XMLSerializer()
078 {
079 super( new OutputFormat( Method.XML, null, false ) );
080 }
081
082
083 /**
084 * Constructs a new serializer. The serializer cannot be used without
085 * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
086 * first.
087 */
088 public XMLSerializer( OutputFormat format )
089 {
090 super( format != null ? format : new OutputFormat( Method.XML, null, false ) );
091 this.format.setMethod( Method.XML );
092 }
093
094
095 /**
096 * Constructs a new serializer that writes to the specified writer
097 * using the specified output format. If <tt>format</tt> is null,
098 * will use a default output format.
099 *
100 * @param writer The writer to use
101 * @param format The output format to use, null for the default
102 */
103 public XMLSerializer( Writer writer, OutputFormat format )
104 {
105 super( format != null ? format : new OutputFormat( Method.XML, null, false ) );
106 this.format.setMethod( Method.XML );
107 setOutputCharStream( writer );
108 }
109
110
111 /**
112 * Constructs a new serializer that writes to the specified output
113 * stream using the specified output format. If <tt>format</tt>
114 * is null, will use a default output format.
115 *
116 * @param output The output stream to use
117 * @param format The output format to use, null for the default
118 */
119 public XMLSerializer( OutputStream output, OutputFormat format )
120 {
121 super( format != null ? format : new OutputFormat( Method.XML, null, false ) );
122 this.format.setMethod( Method.XML );
123 setOutputByteStream( output );
124 }
125
126
127 public void setOutputFormat( OutputFormat format )
128 {
129 super.setOutputFormat( format != null ? format : new OutputFormat( Method.XML, null, false ) );
130 }
131
132
133 //-----------------------------------------//
134 // SAX content handler serializing methods //
135 //-----------------------------------------//
136
137
138 public void startElement( String namespaceURI, String localName,
139 String rawName, Attributes attrs )
140 throws SAXException
141 {
142 int i;
143 boolean preserveSpace;
144 ElementState state;
145 String name;
146 String value;
147
148 try {
149 if ( printer == null )
150 throw new IllegalStateException( "SER002 No writer supplied for serializer" );
151
152 state = getElementState();
153 if ( isDocumentState() ) {
154 // If this is the root element handle it differently.
155 // If the first root element in the document, serialize
156 // the document's DOCTYPE. Space preserving defaults
157 // to that of the output format.
158 if ( ! started )
159 startDocument( ( localName == null || localName.length() == 0 ) ? rawName : localName );
160 } else {
161 // For any other element, if first in parent, then
162 // close parent's opening tag and use the parnet's
163 // space preserving.
164 if ( state.empty )
165 printer.printText( '>' );
166 // Must leave CData section first
167 if ( state.inCData )
168 {
169 printer.printText( "]]>" );
170 state.inCData = false;
171 }
172 // Indent this element on a new line if the first
173 // content of the parent element or immediately
174 // following an element or a comment
175 if ( indenting && ! state.preserveSpace &&
176 ( state.empty || state.afterElement || state.afterComment) )
177 printer.breakLine();
178 }
179 preserveSpace = state.preserveSpace;
180
181 //We remove the namespaces from the attributes list so that they will
182 //be in _prefixes
183 attrs = extractNamespaces(attrs);
184
185 // Do not change the current element state yet.
186 // This only happens in endElement().
187 if ( rawName == null || rawName.length() == 0 ) {
188 if ( localName == null )
189 throw new SAXException( "No rawName and localName is null" );
190 if ( namespaceURI != null && ! namespaceURI.equals( "" ) ) {
191 String prefix;
192 prefix = getPrefix( namespaceURI );
193 if ( prefix != null && prefix.length() > 0 )
194 rawName = prefix + ":" + localName;
195 else
196 rawName = localName;
197 } else
198 rawName = localName;
199 }
200
201 printer.printText( '<' );
202 printer.printText( rawName );
203 printer.indent();
204
205 // For each attribute print it's name and value as one part,
206 // separated with a space so the element can be broken on
207 // multiple lines.
208 if ( attrs != null ) {
209 for ( i = 0 ; i < attrs.getLength() ; ++i ) {
210 printer.printSpace();
211
212 name = attrs.getQName( i );
213 if ( name != null && name.length() == 0 ) {
214 String prefix;
215 String attrURI;
216
217 name = attrs.getLocalName( i );
218 attrURI = attrs.getURI( i );
219 if ( ( attrURI != null && attrURI.length() != 0 ) &&
220 ( namespaceURI == null || namespaceURI.length() == 0 ||
221 ! attrURI.equals( namespaceURI ) ) ) {
222 prefix = getPrefix( attrURI );
223 if ( prefix != null && prefix.length() > 0 )
224 name = prefix + ":" + name;
225 }
226 }
227
228 value = attrs.getValue( i );
229 if ( value == null )
230 value = "";
231 printer.printText( name );
232 printer.printText( "=\"" );
233 printEscaped( value );
234 printer.printText( '"' );
235
236 // If the attribute xml:space exists, determine whether
237 // to preserve spaces in this and child nodes based on
238 // its value.
239 if ( name.equals( "xml:space" ) ) {
240 if ( value.equals( "preserve" ) )
241 preserveSpace = true;
242 else
243 preserveSpace = format.getPreserveSpace();
244 }
245 }
246 }
247
248 if ( prefixes != null ) {
249 Enumeration keyEnum;
250
251 keyEnum = prefixes.keys();
252 while ( keyEnum.hasMoreElements() ) {
253 printer.printSpace();
254 value = (String) keyEnum.nextElement();
255 name = (String) prefixes.get( value );
256 if ( name.length() == 0 ) {
257 printer.printText( "xmlns=\"" );
258 printEscaped( value );
259 printer.printText( '"' );
260 } else {
261 printer.printText( "xmlns:" );
262 printer.printText( name );
263 printer.printText( "=\"" );
264 printEscaped( value );
265 printer.printText( '"' );
266 }
267 }
268 }
269
270 // Now it's time to enter a new element state
271 // with the tag name and space preserving.
272 // We still do not change the curent element state.
273 state = enterElementState( namespaceURI, localName, rawName, preserveSpace );
274 name = ( localName == null || localName.length() == 0 ) ? rawName : namespaceURI + "^" + localName;
275 state.doCData = format.isCDataElement( name );
276 state.unescaped = format.isNonEscapingElement( name );
277 } catch ( IOException except ) {
278 throw new SAXException( except );
279 }
280 }
281
282
283 public void endElement( String namespaceURI, String localName,
284 String rawName )
285 throws SAXException
286 {
287 try {
288 endElementIO( namespaceURI, localName, rawName );
289 } catch ( IOException except ) {
290 throw new SAXException( except );
291 }
292 }
293
294
295 public void endElementIO( String namespaceURI, String localName,
296 String rawName )
297 throws IOException
298 {
299 ElementState state;
300
301 // Works much like content() with additions for closing
302 // an element. Note the different checks for the closed
303 // element's state and the parent element's state.
304 printer.unindent();
305 state = getElementState();
306 if ( state.empty ) {
307 printer.printText( "/>" );
308 } else {
309 // Must leave CData section first
310 if ( state.inCData )
311 printer.printText( "]]>" );
312 // This element is not empty and that last content was
313 // another element, so print a line break before that
314 // last element and this element's closing tag.
315 if ( indenting && ! state.preserveSpace && (state.afterElement || state.afterComment) )
316 printer.breakLine();
317 printer.printText( "</" );
318 printer.printText( state.rawName );
319 printer.printText( '>' );
320 }
321 // Leave the element state and update that of the parent
322 // (if we're not root) to not empty and after element.
323 state = leaveElementState();
324 state.afterElement = true;
325 state.afterComment = false;
326 state.empty = false;
327 if ( isDocumentState() )
328 printer.flush();
329 }
330
331
332 //------------------------------------------//
333 // SAX document handler serializing methods //
334 //------------------------------------------//
335
336
337 public void startElement( String tagName, AttributeList attrs )
338 throws SAXException
339 {
340 int i;
341 boolean preserveSpace;
342 ElementState state;
343 String name;
344 String value;
345
346 try {
347 if ( printer == null )
348 throw new IllegalStateException( "SER002 No writer supplied for serializer" );
349
350 state = getElementState();
351 if ( isDocumentState() ) {
352 // If this is the root element handle it differently.
353 // If the first root element in the document, serialize
354 // the document's DOCTYPE. Space preserving defaults
355 // to that of the output format.
356 if ( ! started )
357 startDocument( tagName );
358 } else {
359 // For any other element, if first in parent, then
360 // close parent's opening tag and use the parnet's
361 // space preserving.
362 if ( state.empty )
363 printer.printText( '>' );
364 // Must leave CData section first
365 if ( state.inCData )
366 {
367 printer.printText( "]]>" );
368 state.inCData = false;
369 }
370 // Indent this element on a new line if the first
371 // content of the parent element or immediately
372 // following an element.
373 if ( indenting && ! state.preserveSpace &&
374 ( state.empty || state.afterElement || state.afterComment) )
375 printer.breakLine();
376 }
377 preserveSpace = state.preserveSpace;
378
379 // Do not change the current element state yet.
380 // This only happens in endElement().
381
382 printer.printText( '<' );
383 printer.printText( tagName );
384 printer.indent();
385
386 // For each attribute print it's name and value as one part,
387 // separated with a space so the element can be broken on
388 // multiple lines.
389 if ( attrs != null ) {
390 for ( i = 0 ; i < attrs.getLength() ; ++i ) {
391 printer.printSpace();
392 name = attrs.getName( i );
393 value = attrs.getValue( i );
394 if ( value != null ) {
395 printer.printText( name );
396 printer.printText( "=\"" );
397 printEscaped( value );
398 printer.printText( '"' );
399 }
400
401 // If the attribute xml:space exists, determine whether
402 // to preserve spaces in this and child nodes based on
403 // its value.
404 if ( name.equals( "xml:space" ) ) {
405 if ( value.equals( "preserve" ) )
406 preserveSpace = true;
407 else
408 preserveSpace = format.getPreserveSpace();
409 }
410 }
411 }
412 // Now it's time to enter a new element state
413 // with the tag name and space preserving.
414 // We still do not change the curent element state.
415 state = enterElementState( null, null, tagName, preserveSpace );
416 state.doCData = format.isCDataElement( tagName );
417 state.unescaped = format.isNonEscapingElement( tagName );
418 } catch ( IOException except ) {
419 throw new SAXException( except );
420 }
421
422 }
423
424
425 public void endElement( String tagName )
426 throws SAXException
427 {
428 endElement( null, null, tagName );
429 }
430
431
432
433 //------------------------------------------//
434 // Generic node serializing methods methods //
435 //------------------------------------------//
436
437
438 /**
439 * Called to serialize the document's DOCTYPE by the root element.
440 * The document type declaration must name the root element,
441 * but the root element is only known when that element is serialized,
442 * and not at the start of the document.
443 * <p>
444 * This method will check if it has not been called before ({@link #started}),
445 * will serialize the document type declaration, and will serialize all
446 * pre-root comments and PIs that were accumulated in the document
447 * (see {@link #serializePreRoot}). Pre-root will be serialized even if
448 * this is not the first root element of the document.
449 */
450 protected void startDocument( String rootTagName )
451 throws IOException
452 {
453 int i;
454 String dtd;
455
456 dtd = printer.leaveDTD();
457 if ( ! started ) {
458
459 if ( ! format.getOmitXMLDeclaration() ) {
460 StringBuffer buffer;
461
462 // Serialize the document declaration appreaing at the head
463 // of very XML document (unless asked not to).
464 buffer = new StringBuffer( "<?xml version=\"" );
465 if ( format.getVersion() != null )
466 buffer.append( format.getVersion() );
467 else
468 buffer.append( "1.0" );
469 buffer.append( '"' );
470 if ( format.getEncoding() != null ) {
471 buffer.append( " encoding=\"" );
472 buffer.append( format.getEncoding() );
473 buffer.append( '"' );
474 }
475 if ( format.getStandalone() && docTypeSystemId == null &&
476 docTypePublicId == null )
477 buffer.append( " standalone=\"yes\"" );
478 buffer.append( "?>" );
479 printer.printText( buffer );
480 printer.breakLine();
481 }
482
483 if ( ! format.getOmitDocumentType() ) {
484 if ( docTypeSystemId != null ) {
485 // System identifier must be specified to print DOCTYPE.
486 // If public identifier is specified print 'PUBLIC
487 // <public> <system>', if not, print 'SYSTEM <system>'.
488 printer.printText( "<!DOCTYPE " );
489 printer.printText( rootTagName );
490 if ( docTypePublicId != null ) {
491 printer.printText( " PUBLIC " );
492 printDoctypeURL( docTypePublicId );
493 if ( indenting ) {
494 printer.breakLine();
495 for ( i = 0 ; i < 18 + rootTagName.length() ; ++i )
496 printer.printText( " " );
497 } else
498 printer.printText( " " );
499 printDoctypeURL( docTypeSystemId );
500 }
501 else {
502 printer.printText( " SYSTEM " );
503 printDoctypeURL( docTypeSystemId );
504 }
505
506 // If we accumulated any DTD contents while printing.
507 // this would be the place to print it.
508 if ( dtd != null && dtd.length() > 0 ) {
509 printer.printText( " [" );
510 printText( dtd, true, true );
511 printer.printText( ']' );
512 }
513
514 printer.printText( ">" );
515 printer.breakLine();
516 } else if ( dtd != null && dtd.length() > 0 ) {
517 printer.printText( "<!DOCTYPE " );
518 printer.printText( rootTagName );
519 printer.printText( " [" );
520 printText( dtd, true, true );
521 printer.printText( "]>" );
522 printer.breakLine();
523 }
524 }
525 }
526 started = true;
527 // Always serialize these, even if not te first root element.
528 serializePreRoot();
529 }
530
531
532 /**
533 * Called to serialize a DOM element. Equivalent to calling {@link
534 * #startElement}, {@link #endElement} and serializing everything
535 * inbetween, but better optimized.
536 */
537 protected void serializeElement( Element elem )
538 throws IOException
539 {
540 Attr attr;
541 NamedNodeMap attrMap;
542 int i;
543 Node child;
544 ElementState state;
545 boolean preserveSpace;
546 String name;
547 String value;
548 String tagName;
549
550 tagName = elem.getTagName();
551 state = getElementState();
552 if ( isDocumentState() ) {
553 // If this is the root element handle it differently.
554 // If the first root element in the document, serialize
555 // the document's DOCTYPE. Space preserving defaults
556 // to that of the output format.
557 if ( ! started )
558 startDocument( tagName );
559 } else {
560 // For any other element, if first in parent, then
561 // close parent's opening tag and use the parnet's
562 // space preserving.
563 if ( state.empty )
564 printer.printText( '>' );
565 // Must leave CData section first
566 if ( state.inCData )
567 {
568 printer.printText( "]]>" );
569 state.inCData = false;
570 }
571 // Indent this element on a new line if the first
572 // content of the parent element or immediately
573 // following an element.
574 if ( indenting && ! state.preserveSpace &&
575 ( state.empty || state.afterElement || state.afterComment) )
576 printer.breakLine();
577 }
578 preserveSpace = state.preserveSpace;
579
580 // Do not change the current element state yet.
581 // This only happens in endElement().
582
583 printer.printText( '<' );
584 printer.printText( tagName );
585 printer.indent();
586
587 // Lookup the element's attribute, but only print specified
588 // attributes. (Unspecified attributes are derived from the DTD.
589 // For each attribute print it's name and value as one part,
590 // separated with a space so the element can be broken on
591 // multiple lines.
592 attrMap = elem.getAttributes();
593 if ( attrMap != null ) {
594 for ( i = 0 ; i < attrMap.getLength() ; ++i ) {
595 attr = (Attr) attrMap.item( i );
596 name = attr.getName();
597 value = attr.getValue();
598 if ( value == null )
599 value = "";
600 if ( attr.getSpecified() ) {
601 printer.printSpace();
602 printer.printText( name );
603 printer.printText( "=\"" );
604 printEscaped( value );
605 printer.printText( '"' );
606 }
607 // If the attribute xml:space exists, determine whether
608 // to preserve spaces in this and child nodes based on
609 // its value.
610 if ( name.equals( "xml:space" ) ) {
611 if ( value.equals( "preserve" ) )
612 preserveSpace = true;
613 else
614 preserveSpace = format.getPreserveSpace();
615 }
616 }
617 }
618
619 // If element has children, then serialize them, otherwise
620 // serialize en empty tag.
621 if ( elem.hasChildNodes() ) {
622 // Enter an element state, and serialize the children
623 // one by one. Finally, end the element.
624 state = enterElementState( null, null, tagName, preserveSpace );
625 state.doCData = format.isCDataElement( tagName );
626 state.unescaped = format.isNonEscapingElement( tagName );
627 child = elem.getFirstChild();
628 while ( child != null ) {
629 serializeNode( child );
630 child = child.getNextSibling();
631 }
632 endElementIO( null, null, tagName );
633 } else {
634 printer.unindent();
635 printer.printText( "/>" );
636 // After element but parent element is no longer empty.
637 state.afterElement = true;
638 state.afterComment = false;
639 state.empty = false;
640 if ( isDocumentState() )
641 printer.flush();
642 }
643 }
644
645
646 protected String getEntityRef( int ch )
647 {
648 // Encode special XML characters into the equivalent character references.
649 // These five are defined by default for all XML documents.
650 switch ( ch ) {
651 case '<':
652 return "lt";
653 case '>':
654 return "gt";
655 case '"':
656 return "quot";
657 case '\'':
658 return "apos";
659 case '&':
660 return "amp";
661 }
662 return null;
663 }
664
665
666 /** Retrieve and remove the namespaces declarations from the list of attributes.
667 *
668 */
669 private Attributes extractNamespaces( Attributes attrs )
670 throws SAXException
671 {
672 AttributesImpl attrsOnly;
673 String rawName;
674 int i;
675 int length;
676
677 length = attrs.getLength();
678 attrsOnly = new AttributesImpl( attrs );
679
680 for ( i = length - 1 ; i >= 0 ; --i ) {
681 rawName = attrsOnly.getQName( i );
682
683 //We have to exclude the namespaces declarations from the attributes
684 //Append only when the feature http://xml.org/sax/features/namespace-prefixes"
685 //is TRUE
686 if ( rawName.startsWith( "xmlns" ) ) {
687 if (rawName.length() == 5) {
688 startPrefixMapping( "", attrs.getValue( i ) );
689 attrsOnly.removeAttribute( i );
690 } else if (rawName.charAt(5) == ':') {
691 startPrefixMapping(rawName.substring(6), attrs.getValue(i));
692 attrsOnly.removeAttribute( i );
693 }
694 }
695 }
696 return attrsOnly;
697 }
698 }