001 /** 002 * 003 * Copyright 2006 The Apache Software Foundation 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 /* 019 * This code has been borrowed from the Apache Xerces project. We're copying the code to 020 * keep from adding a dependency on Xerces in the Geronimo kernel. 021 */ 022 023 package org.apache.geronimo.system.configuration; 024 025 import java.io.IOException; 026 import java.io.OutputStream; 027 import java.io.Writer; 028 import java.util.Enumeration; 029 030 import org.w3c.dom.Attr; 031 import org.w3c.dom.Element; 032 import org.w3c.dom.NamedNodeMap; 033 import org.w3c.dom.Node; 034 import org.xml.sax.AttributeList; 035 import org.xml.sax.Attributes; 036 import org.xml.sax.SAXException; 037 import org.xml.sax.helpers.AttributesImpl; 038 039 040 /** 041 * Implements an XML serializer supporting both DOM and SAX pretty 042 * serializing. For usage instructions see {@link Serializer}. 043 * <p> 044 * If an output stream is used, the encoding is taken from the 045 * output format (defaults to <tt>UTF-8</tt>). If a writer is 046 * used, make sure the writer uses the same encoding (if applies) 047 * as specified in the output format. 048 * <p> 049 * The serializer supports both DOM and SAX. DOM serializing is done 050 * by calling {@link #serialize} and SAX serializing is done by firing 051 * SAX events and using the serializer as a document handler. 052 * <p> 053 * If an I/O exception occurs while serializing, the serializer 054 * will not throw an exception directly, but only throw it 055 * at the end of serializing (either DOM or SAX's {@link 056 * org.xml.sax.DocumentHandler#endDocument}. 057 * <p> 058 * For elements that are not specified as whitespace preserving, 059 * the serializer will potentially break long text lines at space 060 * boundaries, indent lines, and serialize elements on separate 061 * lines. Line terminators will be regarded as spaces, and 062 * spaces at beginning of line will be stripped. 063 * 064 * 065 * @version $Revision: 410741 $ $Date: 2006-05-31 21:35:48 -0700 (Wed, 31 May 2006) $ 066 * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a> 067 * @see Serializer 068 */ 069 public class XMLSerializer extends BaseMarkupSerializer 070 { 071 072 /** 073 * Constructs a new serializer. The serializer cannot be used without 074 * calling {@link #setOutputCharStream} or {@link #setOutputByteStream} 075 * first. 076 */ 077 public XMLSerializer() 078 { 079 super( new OutputFormat( Method.XML, null, false ) ); 080 } 081 082 083 /** 084 * Constructs a new serializer. The serializer cannot be used without 085 * calling {@link #setOutputCharStream} or {@link #setOutputByteStream} 086 * first. 087 */ 088 public XMLSerializer( OutputFormat format ) 089 { 090 super( format != null ? format : new OutputFormat( Method.XML, null, false ) ); 091 this.format.setMethod( Method.XML ); 092 } 093 094 095 /** 096 * Constructs a new serializer that writes to the specified writer 097 * using the specified output format. If <tt>format</tt> is null, 098 * will use a default output format. 099 * 100 * @param writer The writer to use 101 * @param format The output format to use, null for the default 102 */ 103 public XMLSerializer( Writer writer, OutputFormat format ) 104 { 105 super( format != null ? format : new OutputFormat( Method.XML, null, false ) ); 106 this.format.setMethod( Method.XML ); 107 setOutputCharStream( writer ); 108 } 109 110 111 /** 112 * Constructs a new serializer that writes to the specified output 113 * stream using the specified output format. If <tt>format</tt> 114 * is null, will use a default output format. 115 * 116 * @param output The output stream to use 117 * @param format The output format to use, null for the default 118 */ 119 public XMLSerializer( OutputStream output, OutputFormat format ) 120 { 121 super( format != null ? format : new OutputFormat( Method.XML, null, false ) ); 122 this.format.setMethod( Method.XML ); 123 setOutputByteStream( output ); 124 } 125 126 127 public void setOutputFormat( OutputFormat format ) 128 { 129 super.setOutputFormat( format != null ? format : new OutputFormat( Method.XML, null, false ) ); 130 } 131 132 133 //-----------------------------------------// 134 // SAX content handler serializing methods // 135 //-----------------------------------------// 136 137 138 public void startElement( String namespaceURI, String localName, 139 String rawName, Attributes attrs ) 140 throws SAXException 141 { 142 int i; 143 boolean preserveSpace; 144 ElementState state; 145 String name; 146 String value; 147 148 try { 149 if ( printer == null ) 150 throw new IllegalStateException( "SER002 No writer supplied for serializer" ); 151 152 state = getElementState(); 153 if ( isDocumentState() ) { 154 // If this is the root element handle it differently. 155 // If the first root element in the document, serialize 156 // the document's DOCTYPE. Space preserving defaults 157 // to that of the output format. 158 if ( ! started ) 159 startDocument( ( localName == null || localName.length() == 0 ) ? rawName : localName ); 160 } else { 161 // For any other element, if first in parent, then 162 // close parent's opening tag and use the parnet's 163 // space preserving. 164 if ( state.empty ) 165 printer.printText( '>' ); 166 // Must leave CData section first 167 if ( state.inCData ) 168 { 169 printer.printText( "]]>" ); 170 state.inCData = false; 171 } 172 // Indent this element on a new line if the first 173 // content of the parent element or immediately 174 // following an element or a comment 175 if ( indenting && ! state.preserveSpace && 176 ( state.empty || state.afterElement || state.afterComment) ) 177 printer.breakLine(); 178 } 179 preserveSpace = state.preserveSpace; 180 181 //We remove the namespaces from the attributes list so that they will 182 //be in _prefixes 183 attrs = extractNamespaces(attrs); 184 185 // Do not change the current element state yet. 186 // This only happens in endElement(). 187 if ( rawName == null || rawName.length() == 0 ) { 188 if ( localName == null ) 189 throw new SAXException( "No rawName and localName is null" ); 190 if ( namespaceURI != null && ! namespaceURI.equals( "" ) ) { 191 String prefix; 192 prefix = getPrefix( namespaceURI ); 193 if ( prefix != null && prefix.length() > 0 ) 194 rawName = prefix + ":" + localName; 195 else 196 rawName = localName; 197 } else 198 rawName = localName; 199 } 200 201 printer.printText( '<' ); 202 printer.printText( rawName ); 203 printer.indent(); 204 205 // For each attribute print it's name and value as one part, 206 // separated with a space so the element can be broken on 207 // multiple lines. 208 if ( attrs != null ) { 209 for ( i = 0 ; i < attrs.getLength() ; ++i ) { 210 printer.printSpace(); 211 212 name = attrs.getQName( i ); 213 if ( name != null && name.length() == 0 ) { 214 String prefix; 215 String attrURI; 216 217 name = attrs.getLocalName( i ); 218 attrURI = attrs.getURI( i ); 219 if ( ( attrURI != null && attrURI.length() != 0 ) && 220 ( namespaceURI == null || namespaceURI.length() == 0 || 221 ! attrURI.equals( namespaceURI ) ) ) { 222 prefix = getPrefix( attrURI ); 223 if ( prefix != null && prefix.length() > 0 ) 224 name = prefix + ":" + name; 225 } 226 } 227 228 value = attrs.getValue( i ); 229 if ( value == null ) 230 value = ""; 231 printer.printText( name ); 232 printer.printText( "=\"" ); 233 printEscaped( value ); 234 printer.printText( '"' ); 235 236 // If the attribute xml:space exists, determine whether 237 // to preserve spaces in this and child nodes based on 238 // its value. 239 if ( name.equals( "xml:space" ) ) { 240 if ( value.equals( "preserve" ) ) 241 preserveSpace = true; 242 else 243 preserveSpace = format.getPreserveSpace(); 244 } 245 } 246 } 247 248 if ( prefixes != null ) { 249 Enumeration keyEnum; 250 251 keyEnum = prefixes.keys(); 252 while ( keyEnum.hasMoreElements() ) { 253 printer.printSpace(); 254 value = (String) keyEnum.nextElement(); 255 name = (String) prefixes.get( value ); 256 if ( name.length() == 0 ) { 257 printer.printText( "xmlns=\"" ); 258 printEscaped( value ); 259 printer.printText( '"' ); 260 } else { 261 printer.printText( "xmlns:" ); 262 printer.printText( name ); 263 printer.printText( "=\"" ); 264 printEscaped( value ); 265 printer.printText( '"' ); 266 } 267 } 268 } 269 270 // Now it's time to enter a new element state 271 // with the tag name and space preserving. 272 // We still do not change the curent element state. 273 state = enterElementState( namespaceURI, localName, rawName, preserveSpace ); 274 name = ( localName == null || localName.length() == 0 ) ? rawName : namespaceURI + "^" + localName; 275 state.doCData = format.isCDataElement( name ); 276 state.unescaped = format.isNonEscapingElement( name ); 277 } catch ( IOException except ) { 278 throw new SAXException( except ); 279 } 280 } 281 282 283 public void endElement( String namespaceURI, String localName, 284 String rawName ) 285 throws SAXException 286 { 287 try { 288 endElementIO( namespaceURI, localName, rawName ); 289 } catch ( IOException except ) { 290 throw new SAXException( except ); 291 } 292 } 293 294 295 public void endElementIO( String namespaceURI, String localName, 296 String rawName ) 297 throws IOException 298 { 299 ElementState state; 300 301 // Works much like content() with additions for closing 302 // an element. Note the different checks for the closed 303 // element's state and the parent element's state. 304 printer.unindent(); 305 state = getElementState(); 306 if ( state.empty ) { 307 printer.printText( "/>" ); 308 } else { 309 // Must leave CData section first 310 if ( state.inCData ) 311 printer.printText( "]]>" ); 312 // This element is not empty and that last content was 313 // another element, so print a line break before that 314 // last element and this element's closing tag. 315 if ( indenting && ! state.preserveSpace && (state.afterElement || state.afterComment) ) 316 printer.breakLine(); 317 printer.printText( "</" ); 318 printer.printText( state.rawName ); 319 printer.printText( '>' ); 320 } 321 // Leave the element state and update that of the parent 322 // (if we're not root) to not empty and after element. 323 state = leaveElementState(); 324 state.afterElement = true; 325 state.afterComment = false; 326 state.empty = false; 327 if ( isDocumentState() ) 328 printer.flush(); 329 } 330 331 332 //------------------------------------------// 333 // SAX document handler serializing methods // 334 //------------------------------------------// 335 336 337 public void startElement( String tagName, AttributeList attrs ) 338 throws SAXException 339 { 340 int i; 341 boolean preserveSpace; 342 ElementState state; 343 String name; 344 String value; 345 346 try { 347 if ( printer == null ) 348 throw new IllegalStateException( "SER002 No writer supplied for serializer" ); 349 350 state = getElementState(); 351 if ( isDocumentState() ) { 352 // If this is the root element handle it differently. 353 // If the first root element in the document, serialize 354 // the document's DOCTYPE. Space preserving defaults 355 // to that of the output format. 356 if ( ! started ) 357 startDocument( tagName ); 358 } else { 359 // For any other element, if first in parent, then 360 // close parent's opening tag and use the parnet's 361 // space preserving. 362 if ( state.empty ) 363 printer.printText( '>' ); 364 // Must leave CData section first 365 if ( state.inCData ) 366 { 367 printer.printText( "]]>" ); 368 state.inCData = false; 369 } 370 // Indent this element on a new line if the first 371 // content of the parent element or immediately 372 // following an element. 373 if ( indenting && ! state.preserveSpace && 374 ( state.empty || state.afterElement || state.afterComment) ) 375 printer.breakLine(); 376 } 377 preserveSpace = state.preserveSpace; 378 379 // Do not change the current element state yet. 380 // This only happens in endElement(). 381 382 printer.printText( '<' ); 383 printer.printText( tagName ); 384 printer.indent(); 385 386 // For each attribute print it's name and value as one part, 387 // separated with a space so the element can be broken on 388 // multiple lines. 389 if ( attrs != null ) { 390 for ( i = 0 ; i < attrs.getLength() ; ++i ) { 391 printer.printSpace(); 392 name = attrs.getName( i ); 393 value = attrs.getValue( i ); 394 if ( value != null ) { 395 printer.printText( name ); 396 printer.printText( "=\"" ); 397 printEscaped( value ); 398 printer.printText( '"' ); 399 } 400 401 // If the attribute xml:space exists, determine whether 402 // to preserve spaces in this and child nodes based on 403 // its value. 404 if ( name.equals( "xml:space" ) ) { 405 if ( value.equals( "preserve" ) ) 406 preserveSpace = true; 407 else 408 preserveSpace = format.getPreserveSpace(); 409 } 410 } 411 } 412 // Now it's time to enter a new element state 413 // with the tag name and space preserving. 414 // We still do not change the curent element state. 415 state = enterElementState( null, null, tagName, preserveSpace ); 416 state.doCData = format.isCDataElement( tagName ); 417 state.unescaped = format.isNonEscapingElement( tagName ); 418 } catch ( IOException except ) { 419 throw new SAXException( except ); 420 } 421 422 } 423 424 425 public void endElement( String tagName ) 426 throws SAXException 427 { 428 endElement( null, null, tagName ); 429 } 430 431 432 433 //------------------------------------------// 434 // Generic node serializing methods methods // 435 //------------------------------------------// 436 437 438 /** 439 * Called to serialize the document's DOCTYPE by the root element. 440 * The document type declaration must name the root element, 441 * but the root element is only known when that element is serialized, 442 * and not at the start of the document. 443 * <p> 444 * This method will check if it has not been called before ({@link #started}), 445 * will serialize the document type declaration, and will serialize all 446 * pre-root comments and PIs that were accumulated in the document 447 * (see {@link #serializePreRoot}). Pre-root will be serialized even if 448 * this is not the first root element of the document. 449 */ 450 protected void startDocument( String rootTagName ) 451 throws IOException 452 { 453 int i; 454 String dtd; 455 456 dtd = printer.leaveDTD(); 457 if ( ! started ) { 458 459 if ( ! format.getOmitXMLDeclaration() ) { 460 StringBuffer buffer; 461 462 // Serialize the document declaration appreaing at the head 463 // of very XML document (unless asked not to). 464 buffer = new StringBuffer( "<?xml version=\"" ); 465 if ( format.getVersion() != null ) 466 buffer.append( format.getVersion() ); 467 else 468 buffer.append( "1.0" ); 469 buffer.append( '"' ); 470 if ( format.getEncoding() != null ) { 471 buffer.append( " encoding=\"" ); 472 buffer.append( format.getEncoding() ); 473 buffer.append( '"' ); 474 } 475 if ( format.getStandalone() && docTypeSystemId == null && 476 docTypePublicId == null ) 477 buffer.append( " standalone=\"yes\"" ); 478 buffer.append( "?>" ); 479 printer.printText( buffer ); 480 printer.breakLine(); 481 } 482 483 if ( ! format.getOmitDocumentType() ) { 484 if ( docTypeSystemId != null ) { 485 // System identifier must be specified to print DOCTYPE. 486 // If public identifier is specified print 'PUBLIC 487 // <public> <system>', if not, print 'SYSTEM <system>'. 488 printer.printText( "<!DOCTYPE " ); 489 printer.printText( rootTagName ); 490 if ( docTypePublicId != null ) { 491 printer.printText( " PUBLIC " ); 492 printDoctypeURL( docTypePublicId ); 493 if ( indenting ) { 494 printer.breakLine(); 495 for ( i = 0 ; i < 18 + rootTagName.length() ; ++i ) 496 printer.printText( " " ); 497 } else 498 printer.printText( " " ); 499 printDoctypeURL( docTypeSystemId ); 500 } 501 else { 502 printer.printText( " SYSTEM " ); 503 printDoctypeURL( docTypeSystemId ); 504 } 505 506 // If we accumulated any DTD contents while printing. 507 // this would be the place to print it. 508 if ( dtd != null && dtd.length() > 0 ) { 509 printer.printText( " [" ); 510 printText( dtd, true, true ); 511 printer.printText( ']' ); 512 } 513 514 printer.printText( ">" ); 515 printer.breakLine(); 516 } else if ( dtd != null && dtd.length() > 0 ) { 517 printer.printText( "<!DOCTYPE " ); 518 printer.printText( rootTagName ); 519 printer.printText( " [" ); 520 printText( dtd, true, true ); 521 printer.printText( "]>" ); 522 printer.breakLine(); 523 } 524 } 525 } 526 started = true; 527 // Always serialize these, even if not te first root element. 528 serializePreRoot(); 529 } 530 531 532 /** 533 * Called to serialize a DOM element. Equivalent to calling {@link 534 * #startElement}, {@link #endElement} and serializing everything 535 * inbetween, but better optimized. 536 */ 537 protected void serializeElement( Element elem ) 538 throws IOException 539 { 540 Attr attr; 541 NamedNodeMap attrMap; 542 int i; 543 Node child; 544 ElementState state; 545 boolean preserveSpace; 546 String name; 547 String value; 548 String tagName; 549 550 tagName = elem.getTagName(); 551 state = getElementState(); 552 if ( isDocumentState() ) { 553 // If this is the root element handle it differently. 554 // If the first root element in the document, serialize 555 // the document's DOCTYPE. Space preserving defaults 556 // to that of the output format. 557 if ( ! started ) 558 startDocument( tagName ); 559 } else { 560 // For any other element, if first in parent, then 561 // close parent's opening tag and use the parnet's 562 // space preserving. 563 if ( state.empty ) 564 printer.printText( '>' ); 565 // Must leave CData section first 566 if ( state.inCData ) 567 { 568 printer.printText( "]]>" ); 569 state.inCData = false; 570 } 571 // Indent this element on a new line if the first 572 // content of the parent element or immediately 573 // following an element. 574 if ( indenting && ! state.preserveSpace && 575 ( state.empty || state.afterElement || state.afterComment) ) 576 printer.breakLine(); 577 } 578 preserveSpace = state.preserveSpace; 579 580 // Do not change the current element state yet. 581 // This only happens in endElement(). 582 583 printer.printText( '<' ); 584 printer.printText( tagName ); 585 printer.indent(); 586 587 // Lookup the element's attribute, but only print specified 588 // attributes. (Unspecified attributes are derived from the DTD. 589 // For each attribute print it's name and value as one part, 590 // separated with a space so the element can be broken on 591 // multiple lines. 592 attrMap = elem.getAttributes(); 593 if ( attrMap != null ) { 594 for ( i = 0 ; i < attrMap.getLength() ; ++i ) { 595 attr = (Attr) attrMap.item( i ); 596 name = attr.getName(); 597 value = attr.getValue(); 598 if ( value == null ) 599 value = ""; 600 if ( attr.getSpecified() ) { 601 printer.printSpace(); 602 printer.printText( name ); 603 printer.printText( "=\"" ); 604 printEscaped( value ); 605 printer.printText( '"' ); 606 } 607 // If the attribute xml:space exists, determine whether 608 // to preserve spaces in this and child nodes based on 609 // its value. 610 if ( name.equals( "xml:space" ) ) { 611 if ( value.equals( "preserve" ) ) 612 preserveSpace = true; 613 else 614 preserveSpace = format.getPreserveSpace(); 615 } 616 } 617 } 618 619 // If element has children, then serialize them, otherwise 620 // serialize en empty tag. 621 if ( elem.hasChildNodes() ) { 622 // Enter an element state, and serialize the children 623 // one by one. Finally, end the element. 624 state = enterElementState( null, null, tagName, preserveSpace ); 625 state.doCData = format.isCDataElement( tagName ); 626 state.unescaped = format.isNonEscapingElement( tagName ); 627 child = elem.getFirstChild(); 628 while ( child != null ) { 629 serializeNode( child ); 630 child = child.getNextSibling(); 631 } 632 endElementIO( null, null, tagName ); 633 } else { 634 printer.unindent(); 635 printer.printText( "/>" ); 636 // After element but parent element is no longer empty. 637 state.afterElement = true; 638 state.afterComment = false; 639 state.empty = false; 640 if ( isDocumentState() ) 641 printer.flush(); 642 } 643 } 644 645 646 protected String getEntityRef( int ch ) 647 { 648 // Encode special XML characters into the equivalent character references. 649 // These five are defined by default for all XML documents. 650 switch ( ch ) { 651 case '<': 652 return "lt"; 653 case '>': 654 return "gt"; 655 case '"': 656 return "quot"; 657 case '\'': 658 return "apos"; 659 case '&': 660 return "amp"; 661 } 662 return null; 663 } 664 665 666 /** Retrieve and remove the namespaces declarations from the list of attributes. 667 * 668 */ 669 private Attributes extractNamespaces( Attributes attrs ) 670 throws SAXException 671 { 672 AttributesImpl attrsOnly; 673 String rawName; 674 int i; 675 int length; 676 677 length = attrs.getLength(); 678 attrsOnly = new AttributesImpl( attrs ); 679 680 for ( i = length - 1 ; i >= 0 ; --i ) { 681 rawName = attrsOnly.getQName( i ); 682 683 //We have to exclude the namespaces declarations from the attributes 684 //Append only when the feature http://xml.org/sax/features/namespace-prefixes" 685 //is TRUE 686 if ( rawName.startsWith( "xmlns" ) ) { 687 if (rawName.length() == 5) { 688 startPrefixMapping( "", attrs.getValue( i ) ); 689 attrsOnly.removeAttribute( i ); 690 } else if (rawName.charAt(5) == ':') { 691 startPrefixMapping(rawName.substring(6), attrs.getValue(i)); 692 attrsOnly.removeAttribute( i ); 693 } 694 } 695 } 696 return attrsOnly; 697 } 698 }