001 /** 002 * 003 * Copyright 2006 The Apache Software Foundation 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 /* 019 * This code has been borrowed from the Apache Xerces project. We're copying the code to 020 * keep from adding a dependency on Xerces in the Geronimo kernel. 021 */ 022 023 package org.apache.geronimo.system.configuration; 024 025 import java.io.Writer; 026 import java.io.OutputStream; 027 import java.io.IOException; 028 import java.util.Vector; 029 import java.util.Hashtable; 030 031 import org.w3c.dom.Document; 032 import org.w3c.dom.DocumentFragment; 033 import org.w3c.dom.DocumentType; 034 import org.w3c.dom.Element; 035 import org.w3c.dom.Node; 036 import org.xml.sax.Locator; 037 import org.xml.sax.SAXException; 038 039 /** 040 * Base class for a serializer supporting both DOM and SAX pretty 041 * serializing of XML/HTML/XHTML documents. Derives classes perform 042 * the method-specific serializing, this class provides the common 043 * serializing mechanisms. 044 * <p> 045 * The serializer must be initialized with the proper writer and 046 * output format before it can be used by calling {@link #init}. 047 * The serializer can be reused any number of times, but cannot 048 * be used concurrently by two threads. 049 * <p> 050 * If an output stream is used, the encoding is taken from the 051 * output format (defaults to <tt>UTF-8</tt>). If a writer is 052 * used, make sure the writer uses the same encoding (if applies) 053 * as specified in the output format. 054 * <p> 055 * The serializer supports both DOM and SAX. DOM serializing is done 056 * by calling {@link #serialize} and SAX serializing is done by firing 057 * SAX events and using the serializer as a document handler. 058 * This also applies to derived class. 059 * <p> 060 * If an I/O exception occurs while serializing, the serializer 061 * will not throw an exception directly, but only throw it 062 * at the end of serializing (either DOM or SAX's {@link 063 * org.xml.sax.DocumentHandler#endDocument}. 064 * <p> 065 * For elements that are not specified as whitespace preserving, 066 * the serializer will potentially break long text lines at space 067 * boundaries, indent lines, and serialize elements on separate 068 * lines. Line terminators will be regarded as spaces, and 069 * spaces at beginning of line will be stripped. 070 * <p> 071 * When indenting, the serializer is capable of detecting seemingly 072 * element content, and serializing these elements indented on separate 073 * lines. An element is serialized indented when it is the first or 074 * last child of an element, or immediate following or preceding 075 * another element. 076 * 077 * 078 * @version $Revision: 410741 $ $Date: 2006-05-31 21:35:48 -0700 (Wed, 31 May 2006) $ 079 * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a> 080 * @see Serializer 081 * @see DOMSerializer 082 */ 083 public abstract class BaseMarkupSerializer 084 { 085 086 private EncodingInfo encodingInfo; 087 088 /** 089 * Holds array of all element states that have been entered. 090 * The array is automatically resized. When leaving an element, 091 * it's state is not removed but reused when later returning 092 * to the same nesting level. 093 */ 094 private ElementState[] elementStates; 095 096 /** 097 * The index of the next state to place in the array, 098 * or one plus the index of the current state. When zero, 099 * we are in no state. 100 */ 101 private int elementStateCount; 102 103 /** 104 * Vector holding comments and PIs that come before the root 105 * element (even after it), see {@link #serializePreRoot}. 106 */ 107 private Vector preRoot; 108 109 /** 110 * If the document has been started (header serialized), this 111 * flag is set to true so it's not started twice. 112 */ 113 protected boolean started; 114 115 /** 116 * True if the serializer has been prepared. This flag is set 117 * to false when the serializer is reset prior to using it, 118 * and to true after it has been prepared for usage. 119 */ 120 private boolean prepared; 121 122 /** 123 * Association between namespace URIs (keys) and prefixes (values). 124 * Accumulated here prior to starting an element and placing this 125 * list in the element state. 126 */ 127 protected Hashtable prefixes; 128 129 /** 130 * The system identifier of the document type, if known. 131 */ 132 protected String docTypePublicId; 133 134 135 /** 136 * The system identifier of the document type, if known. 137 */ 138 protected String docTypeSystemId; 139 140 141 /** 142 * The output format associated with this serializer. This will never 143 * be a null reference. If no format was passed to the constructor, 144 * the default one for this document type will be used. The format 145 * object is never changed by the serializer. 146 */ 147 protected OutputFormat format; 148 149 150 /** 151 * The printer used for printing text parts. 152 */ 153 protected Printer printer; 154 155 156 /** 157 * True if indenting printer. 158 */ 159 protected boolean indenting; 160 161 162 /** 163 * The underlying writer. 164 */ 165 private Writer writer; 166 167 168 /** 169 * The output stream. 170 */ 171 private OutputStream output; 172 173 174 //--------------------------------// 175 // Constructor and initialization // 176 //--------------------------------// 177 178 179 /** 180 * Protected constructor can only be used by derived class. 181 * Must initialize the serializer before serializing any document, 182 * see {@link #init}. 183 */ 184 protected BaseMarkupSerializer( OutputFormat format ) 185 { 186 int i; 187 188 elementStates = new ElementState[ 10 ]; 189 for ( i = 0 ; i < elementStates.length ; ++i ) 190 elementStates[ i ] = new ElementState(); 191 this.format = format; 192 } 193 194 195 public void setOutputByteStream( OutputStream output ) 196 { 197 if ( output == null ) 198 throw new NullPointerException( "SER001 Argument 'output' is null." ); 199 this.output = output; 200 writer = null; 201 reset(); 202 } 203 204 205 public void setOutputCharStream( Writer writer ) 206 { 207 if ( writer == null ) 208 throw new NullPointerException( "SER001 Argument 'writer' is null." ); 209 this.writer = writer; 210 output = null; 211 reset(); 212 } 213 214 215 public void setOutputFormat( OutputFormat format ) 216 { 217 if ( format == null ) 218 throw new NullPointerException( "SER001 Argument 'format' is null." ); 219 this.format = format; 220 reset(); 221 } 222 223 224 public boolean reset() 225 { 226 if ( elementStateCount > 1 ) 227 throw new IllegalStateException( "Serializer reset in the middle of serialization" ); 228 prepared = false; 229 return true; 230 } 231 232 233 protected void prepare() 234 throws IOException 235 { 236 if ( prepared ) 237 return; 238 239 if ( writer == null && output == null ) 240 throw new IOException( "SER002 No writer supplied for serializer" ); 241 // If the output stream has been set, use it to construct 242 // the writer. It is possible that the serializer has been 243 // reused with the same output stream and different encoding. 244 245 encodingInfo = format.getEncodingInfo(); 246 247 if ( output != null ) { 248 writer = encodingInfo.getWriter(output); 249 } 250 251 if ( format.getIndenting() ) { 252 indenting = true; 253 printer = new IndentPrinter( writer, format ); 254 } else { 255 indenting = false; 256 printer = new Printer( writer, format ); 257 } 258 259 ElementState state; 260 261 elementStateCount = 0; 262 state = elementStates[ 0 ]; 263 state.namespaceURI = null; 264 state.localName = null; 265 state.rawName = null; 266 state.preserveSpace = format.getPreserveSpace(); 267 state.empty = true; 268 state.afterElement = false; 269 state.afterComment = false; 270 state.doCData = state.inCData = false; 271 state.prefixes = null; 272 273 docTypePublicId = format.getDoctypePublic(); 274 docTypeSystemId = format.getDoctypeSystem(); 275 started = false; 276 prepared = true; 277 } 278 279 280 281 //----------------------------------// 282 // DOM document serializing methods // 283 //----------------------------------// 284 285 286 /** 287 * Serializes the DOM element using the previously specified 288 * writer and output format. Throws an exception only if 289 * an I/O exception occured while serializing. 290 * 291 * @param elem The element to serialize 292 * @throws IOException An I/O exception occured while 293 * serializing 294 */ 295 public void serialize( Element elem ) 296 throws IOException 297 { 298 prepare(); 299 serializeNode( elem ); 300 printer.flush(); 301 if ( printer.getException() != null ) 302 throw printer.getException(); 303 } 304 305 306 /** 307 * Serializes the DOM document fragmnt using the previously specified 308 * writer and output format. Throws an exception only if 309 * an I/O exception occured while serializing. 310 * 311 * @param elem The element to serialize 312 * @throws IOException An I/O exception occured while 313 * serializing 314 */ 315 public void serialize( DocumentFragment frag ) 316 throws IOException 317 { 318 prepare(); 319 serializeNode( frag ); 320 printer.flush(); 321 if ( printer.getException() != null ) 322 throw printer.getException(); 323 } 324 325 326 /** 327 * Serializes the DOM document using the previously specified 328 * writer and output format. Throws an exception only if 329 * an I/O exception occured while serializing. 330 * 331 * @param doc The document to serialize 332 * @throws IOException An I/O exception occured while 333 * serializing 334 */ 335 public void serialize( Document doc ) 336 throws IOException 337 { 338 prepare(); 339 serializeNode( doc ); 340 serializePreRoot(); 341 printer.flush(); 342 if ( printer.getException() != null ) 343 throw printer.getException(); 344 } 345 346 347 //------------------------------------------// 348 // SAX document handler serializing methods // 349 //------------------------------------------// 350 351 352 public void startDocument() 353 throws SAXException 354 { 355 try { 356 prepare(); 357 } catch ( IOException except ) { 358 throw new SAXException( except.toString() ); 359 } 360 // Nothing to do here. All the magic happens in startDocument(String) 361 } 362 363 364 public void characters( char[] chars, int start, int length ) 365 throws SAXException 366 { 367 ElementState state; 368 369 try { 370 state = content(); 371 372 // Check if text should be print as CDATA section or unescaped 373 // based on elements listed in the output format (the element 374 // state) or whether we are inside a CDATA section or entity. 375 376 if ( state.inCData || state.doCData ) { 377 int saveIndent; 378 379 // Print a CDATA section. The text is not escaped, but ']]>' 380 // appearing in the code must be identified and dealt with. 381 // The contents of a text node is considered space preserving. 382 if ( ! state.inCData ) { 383 printer.printText( "<![CDATA[" ); 384 state.inCData = true; 385 } 386 saveIndent = printer.getNextIndent(); 387 printer.setNextIndent( 0 ); 388 for ( int index = 0 ; index < length ; ++index ) { 389 if ( index + 2 < length && chars[ index ] == ']' && 390 chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) { 391 392 printText( chars, start, index + 2, true, true ); 393 printer.printText( "]]><![CDATA[" ); 394 start += index + 2; 395 length -= index + 2; 396 index = 0; 397 } 398 } 399 if ( length > 0 ) 400 printText( chars, start, length, true, true ); 401 printer.setNextIndent( saveIndent ); 402 403 } else { 404 405 int saveIndent; 406 407 if ( state.preserveSpace ) { 408 // If preserving space then hold of indentation so no 409 // excessive spaces are printed at line breaks, escape 410 // the text content without replacing spaces and print 411 // the text breaking only at line breaks. 412 saveIndent = printer.getNextIndent(); 413 printer.setNextIndent( 0 ); 414 printText( chars, start, length, true, state.unescaped ); 415 printer.setNextIndent( saveIndent ); 416 } else { 417 printText( chars, start, length, false, state.unescaped ); 418 } 419 } 420 } catch ( IOException except ) { 421 throw new SAXException( except ); 422 } 423 } 424 425 426 public void ignorableWhitespace( char[] chars, int start, int length ) 427 throws SAXException 428 { 429 int i; 430 431 try { 432 content(); 433 434 // Print ignorable whitespaces only when indenting, after 435 // all they are indentation. Cancel the indentation to 436 // not indent twice. 437 if ( indenting ) { 438 printer.setThisIndent( 0 ); 439 for ( i = start ; length-- > 0 ; ++i ) 440 printer.printText( chars[ i ] ); 441 } 442 } catch ( IOException except ) { 443 throw new SAXException( except ); 444 } 445 } 446 447 448 public final void processingInstruction( String target, String code ) 449 throws SAXException 450 { 451 try { 452 processingInstructionIO( target, code ); 453 } catch ( IOException except ) { 454 throw new SAXException( except ); 455 } 456 } 457 458 public void processingInstructionIO( String target, String code ) 459 throws IOException 460 { 461 int index; 462 StringBuffer buffer; 463 ElementState state; 464 465 state = content(); 466 buffer = new StringBuffer( 40 ); 467 468 // Create the processing instruction textual representation. 469 // Make sure we don't have '?>' inside either target or code. 470 index = target.indexOf( "?>" ); 471 if ( index >= 0 ) 472 buffer.append( "<?" ).append( target.substring( 0, index ) ); 473 else 474 buffer.append( "<?" ).append( target ); 475 if ( code != null ) { 476 buffer.append( ' ' ); 477 index = code.indexOf( "?>" ); 478 if ( index >= 0 ) 479 buffer.append( code.substring( 0, index ) ); 480 else 481 buffer.append( code ); 482 } 483 buffer.append( "?>" ); 484 485 // If before the root element (or after it), do not print 486 // the PI directly but place it in the pre-root vector. 487 if ( isDocumentState() ) { 488 if ( preRoot == null ) 489 preRoot = new Vector(); 490 preRoot.addElement( buffer.toString() ); 491 } else { 492 printer.indent(); 493 printText( buffer.toString(), true, true ); 494 printer.unindent(); 495 if ( indenting ) 496 state.afterElement = true; 497 } 498 } 499 500 501 public void comment( char[] chars, int start, int length ) 502 throws SAXException 503 { 504 try { 505 comment( new String( chars, start, length ) ); 506 } catch ( IOException except ) { 507 throw new SAXException( except ); 508 } 509 } 510 511 512 public void comment( String text ) 513 throws IOException 514 { 515 StringBuffer buffer; 516 int index; 517 ElementState state; 518 519 if ( format.getOmitComments() ) 520 return; 521 522 state = content(); 523 buffer = new StringBuffer( 40 ); 524 // Create the processing comment textual representation. 525 // Make sure we don't have '-->' inside the comment. 526 index = text.indexOf( "-->" ); 527 if ( index >= 0 ) 528 buffer.append( "<!--" ).append( text.substring( 0, index ) ).append( "-->" ); 529 else 530 buffer.append( "<!--" ).append( text ).append( "-->" ); 531 532 // If before the root element (or after it), do not print 533 // the comment directly but place it in the pre-root vector. 534 if ( isDocumentState() ) { 535 if ( preRoot == null ) 536 preRoot = new Vector(); 537 preRoot.addElement( buffer.toString() ); 538 } else { 539 // Indent this element on a new line if the first 540 // content of the parent element or immediately 541 // following an element. 542 if ( indenting && ! state.preserveSpace) 543 printer.breakLine(); 544 printer.indent(); 545 printText( buffer.toString(), true, true ); 546 printer.unindent(); 547 if ( indenting ) 548 state.afterElement = true; 549 } 550 state.afterComment = true; 551 state.afterElement = false; 552 } 553 554 555 public void startCDATA() 556 { 557 ElementState state; 558 559 state = getElementState(); 560 state.doCData = true; 561 } 562 563 564 public void endCDATA() 565 { 566 ElementState state; 567 568 state = getElementState(); 569 state.doCData = false; 570 } 571 572 573 public void startNonEscaping() 574 { 575 ElementState state; 576 577 state = getElementState(); 578 state.unescaped = true; 579 } 580 581 582 public void endNonEscaping() 583 { 584 ElementState state; 585 586 state = getElementState(); 587 state.unescaped = false; 588 } 589 590 591 public void startPreserving() 592 { 593 ElementState state; 594 595 state = getElementState(); 596 state.preserveSpace = true; 597 } 598 599 600 public void endPreserving() 601 { 602 ElementState state; 603 604 state = getElementState(); 605 state.preserveSpace = false; 606 } 607 608 609 /** 610 * Called at the end of the document to wrap it up. 611 * Will flush the output stream and throw an exception 612 * if any I/O error occured while serializing. 613 * 614 * @throws SAXException An I/O exception occured during 615 * serializing 616 */ 617 public void endDocument() 618 throws SAXException 619 { 620 try { 621 // Print all the elements accumulated outside of 622 // the root element. 623 serializePreRoot(); 624 // Flush the output, this is necessary for buffered output. 625 printer.flush(); 626 } catch ( IOException except ) { 627 throw new SAXException( except ); 628 } 629 } 630 631 632 public void startEntity( String name ) 633 { 634 // ??? 635 } 636 637 638 public void endEntity( String name ) 639 { 640 // ??? 641 } 642 643 644 public void setDocumentLocator( Locator locator ) 645 { 646 // Nothing to do 647 } 648 649 650 //-----------------------------------------// 651 // SAX content handler serializing methods // 652 //-----------------------------------------// 653 654 655 public void skippedEntity ( String name ) 656 throws SAXException 657 { 658 try { 659 endCDATA(); 660 content(); 661 printer.printText( '&' ); 662 printer.printText( name ); 663 printer.printText( ';' ); 664 } catch ( IOException except ) { 665 throw new SAXException( except ); 666 } 667 } 668 669 670 public void startPrefixMapping( String prefix, String uri ) 671 throws SAXException 672 { 673 if ( prefixes == null ) 674 prefixes = new Hashtable(); 675 prefixes.put( uri, prefix == null ? "" : prefix ); 676 } 677 678 679 public void endPrefixMapping( String prefix ) 680 throws SAXException 681 { 682 } 683 684 685 //------------------------------------------// 686 // SAX DTD/Decl handler serializing methods // 687 //------------------------------------------// 688 689 690 public final void startDTD( String name, String publicId, String systemId ) 691 throws SAXException 692 { 693 try { 694 printer.enterDTD(); 695 docTypePublicId = publicId; 696 docTypeSystemId = systemId; 697 } catch ( IOException except ) { 698 throw new SAXException( except ); 699 } 700 } 701 702 703 public void endDTD() 704 { 705 // Nothing to do here, all the magic occurs in startDocument(String). 706 } 707 708 709 public void elementDecl( String name, String model ) 710 throws SAXException 711 { 712 try { 713 printer.enterDTD(); 714 printer.printText( "<!ELEMENT " ); 715 printer.printText( name ); 716 printer.printText( ' ' ); 717 printer.printText( model ); 718 printer.printText( '>' ); 719 if ( indenting ) 720 printer.breakLine(); 721 } catch ( IOException except ) { 722 throw new SAXException( except ); 723 } 724 } 725 726 727 public void attributeDecl( String eName, String aName, String type, 728 String valueDefault, String value ) 729 throws SAXException 730 { 731 try { 732 printer.enterDTD(); 733 printer.printText( "<!ATTLIST " ); 734 printer.printText( eName ); 735 printer.printText( ' ' ); 736 printer.printText( aName ); 737 printer.printText( ' ' ); 738 printer.printText( type ); 739 if ( valueDefault != null ) { 740 printer.printText( ' ' ); 741 printer.printText( valueDefault ); 742 } 743 if ( value != null ) { 744 printer.printText( " \"" ); 745 printEscaped( value ); 746 printer.printText( '"' ); 747 } 748 printer.printText( '>' ); 749 if ( indenting ) 750 printer.breakLine(); 751 } catch ( IOException except ) { 752 throw new SAXException( except ); 753 } 754 } 755 756 757 public void internalEntityDecl( String name, String value ) 758 throws SAXException 759 { 760 try { 761 printer.enterDTD(); 762 printer.printText( "<!ENTITY " ); 763 printer.printText( name ); 764 printer.printText( " \"" ); 765 printEscaped( value ); 766 printer.printText( "\">" ); 767 if ( indenting ) 768 printer.breakLine(); 769 } catch ( IOException except ) { 770 throw new SAXException( except ); 771 } 772 } 773 774 775 public void externalEntityDecl( String name, String publicId, String systemId ) 776 throws SAXException 777 { 778 try { 779 printer.enterDTD(); 780 unparsedEntityDecl( name, publicId, systemId, null ); 781 } catch ( IOException except ) { 782 throw new SAXException( except ); 783 } 784 } 785 786 787 public void unparsedEntityDecl( String name, String publicId, 788 String systemId, String notationName ) 789 throws SAXException 790 { 791 try { 792 printer.enterDTD(); 793 if ( publicId == null ) { 794 printer.printText( "<!ENTITY " ); 795 printer.printText( name ); 796 printer.printText( " SYSTEM " ); 797 printDoctypeURL( systemId ); 798 } else { 799 printer.printText( "<!ENTITY " ); 800 printer.printText( name ); 801 printer.printText( " PUBLIC " ); 802 printDoctypeURL( publicId ); 803 printer.printText( ' ' ); 804 printDoctypeURL( systemId ); 805 } 806 if ( notationName != null ) { 807 printer.printText( " NDATA " ); 808 printer.printText( notationName ); 809 } 810 printer.printText( '>' ); 811 if ( indenting ) 812 printer.breakLine(); 813 } catch ( IOException except ) { 814 throw new SAXException( except ); 815 } 816 } 817 818 819 public void notationDecl( String name, String publicId, String systemId ) 820 throws SAXException 821 { 822 try { 823 printer.enterDTD(); 824 if ( publicId != null ) { 825 printer.printText( "<!NOTATION " ); 826 printer.printText( name ); 827 printer.printText( " PUBLIC " ); 828 printDoctypeURL( publicId ); 829 if ( systemId != null ) { 830 printer.printText( ' ' ); 831 printDoctypeURL( systemId ); 832 } 833 } else { 834 printer.printText( "<!NOTATION " ); 835 printer.printText( name ); 836 printer.printText( " SYSTEM " ); 837 printDoctypeURL( systemId ); 838 } 839 printer.printText( '>' ); 840 if ( indenting ) 841 printer.breakLine(); 842 } catch ( IOException except ) { 843 throw new SAXException( except ); 844 } 845 } 846 847 848 //------------------------------------------// 849 // Generic node serializing methods methods // 850 //------------------------------------------// 851 852 853 /** 854 * Serialize the DOM node. This method is shared across XML, HTML and XHTML 855 * serializers and the differences are masked out in a separate {@link 856 * #serializeElement}. 857 * 858 * @param node The node to serialize 859 * @see #serializeElement 860 * @throws IOException An I/O exception occured while 861 * serializing 862 */ 863 protected void serializeNode( Node node ) 864 throws IOException 865 { 866 // Based on the node type call the suitable SAX handler. 867 // Only comments entities and documents which are not 868 // handled by SAX are serialized directly. 869 switch ( node.getNodeType() ) { 870 case Node.TEXT_NODE : { 871 String text; 872 873 text = node.getNodeValue(); 874 if ( text != null ) 875 if ( !indenting || getElementState().preserveSpace 876 || (text.replace('\n',' ').trim().length() != 0)) 877 characters( text ); 878 break; 879 } 880 881 case Node.CDATA_SECTION_NODE : { 882 String text; 883 884 text = node.getNodeValue(); 885 if ( text != null ) { 886 startCDATA(); 887 characters( text ); 888 endCDATA(); 889 } 890 break; 891 } 892 893 case Node.COMMENT_NODE : { 894 String text; 895 896 if ( ! format.getOmitComments() ) { 897 text = node.getNodeValue(); 898 if ( text != null ) 899 comment( text ); 900 } 901 break; 902 } 903 904 case Node.ENTITY_REFERENCE_NODE : { 905 Node child; 906 907 endCDATA(); 908 content(); 909 child = node.getFirstChild(); 910 while ( child != null ) { 911 serializeNode( child ); 912 child = child.getNextSibling(); 913 } 914 break; 915 } 916 917 case Node.PROCESSING_INSTRUCTION_NODE : 918 processingInstructionIO( node.getNodeName(), node.getNodeValue() ); 919 break; 920 921 case Node.ELEMENT_NODE : 922 serializeElement( (Element) node ); 923 break; 924 925 case Node.DOCUMENT_NODE : { 926 DocumentType docType; 927 928 // If there is a document type, use the SAX events to 929 // serialize it. 930 docType = ( (Document) node ).getDoctype(); 931 if (docType != null) { 932 // DOM Level 2 (or higher) 933 // TODO: result of the following call was assigned to a local variable that was never 934 // read. Can the call be deleted? 935 ( (Document) node ).getImplementation(); 936 try { 937 String internal; 938 939 printer.enterDTD(); 940 docTypePublicId = docType.getPublicId(); 941 docTypeSystemId = docType.getSystemId(); 942 internal = docType.getInternalSubset(); 943 if ( internal != null && internal.length() > 0 ) 944 printer.printText( internal ); 945 endDTD(); 946 } 947 // DOM Level 1 -- does implementation have methods? 948 catch (NoSuchMethodError nsme) { 949 Class docTypeClass = docType.getClass(); 950 951 String docTypePublicId = null; 952 String docTypeSystemId = null; 953 try { 954 java.lang.reflect.Method getPublicId = docTypeClass.getMethod("getPublicId", null); 955 if (getPublicId.getReturnType().equals(String.class)) { 956 docTypePublicId = (String)getPublicId.invoke(docType, null); 957 } 958 } 959 catch (Exception e) { 960 // ignore 961 } 962 try { 963 java.lang.reflect.Method getSystemId = docTypeClass.getMethod("getSystemId", null); 964 if (getSystemId.getReturnType().equals(String.class)) { 965 docTypeSystemId = (String)getSystemId.invoke(docType, null); 966 } 967 } 968 catch (Exception e) { 969 // ignore 970 } 971 this.printer.enterDTD(); 972 this.docTypePublicId = docTypePublicId; 973 this.docTypeSystemId = docTypeSystemId; 974 endDTD(); 975 } 976 } 977 // !! Fall through 978 } 979 case Node.DOCUMENT_FRAGMENT_NODE : { 980 Node child; 981 982 // By definition this will happen if the node is a document, 983 // document fragment, etc. Just serialize its contents. It will 984 // work well for other nodes that we do not know how to serialize. 985 child = node.getFirstChild(); 986 while ( child != null ) { 987 serializeNode( child ); 988 child = child.getNextSibling(); 989 } 990 break; 991 } 992 993 default: 994 break; 995 } 996 } 997 998 999 /** 1000 * Must be called by a method about to print any type of content. 1001 * If the element was just opened, the opening tag is closed and 1002 * will be matched to a closing tag. Returns the current element 1003 * state with <tt>empty</tt> and <tt>afterElement</tt> set to false. 1004 * 1005 * @return The current element state 1006 * @throws IOException An I/O exception occured while 1007 * serializing 1008 */ 1009 protected ElementState content() 1010 throws IOException 1011 { 1012 ElementState state; 1013 1014 state = getElementState(); 1015 if ( ! isDocumentState() ) { 1016 // Need to close CData section first 1017 if ( state.inCData && ! state.doCData ) { 1018 printer.printText( "]]>" ); 1019 state.inCData = false; 1020 } 1021 // If this is the first content in the element, 1022 // change the state to not-empty and close the 1023 // opening element tag. 1024 if ( state.empty ) { 1025 printer.printText( '>' ); 1026 state.empty = false; 1027 } 1028 // Except for one content type, all of them 1029 // are not last element. That one content 1030 // type will take care of itself. 1031 state.afterElement = false; 1032 // Except for one content type, all of them 1033 // are not last comment. That one content 1034 // type will take care of itself. 1035 state.afterComment = false; 1036 } 1037 return state; 1038 } 1039 1040 1041 /** 1042 * Called to print the text contents in the prevailing element format. 1043 * Since this method is capable of printing text as CDATA, it is used 1044 * for that purpose as well. White space handling is determined by the 1045 * current element state. In addition, the output format can dictate 1046 * whether the text is printed as CDATA or unescaped. 1047 * 1048 * @param text The text to print 1049 * @param unescaped True is should print unescaped 1050 * @throws IOException An I/O exception occured while 1051 * serializing 1052 */ 1053 protected void characters( String text ) 1054 throws IOException 1055 { 1056 ElementState state; 1057 1058 state = content(); 1059 // Check if text should be print as CDATA section or unescaped 1060 // based on elements listed in the output format (the element 1061 // state) or whether we are inside a CDATA section or entity. 1062 1063 if ( state.inCData || state.doCData ) { 1064 StringBuffer buffer; 1065 int index; 1066 int saveIndent; 1067 1068 // Print a CDATA section. The text is not escaped, but ']]>' 1069 // appearing in the code must be identified and dealt with. 1070 // The contents of a text node is considered space preserving. 1071 buffer = new StringBuffer( text.length() ); 1072 if ( ! state.inCData ) { 1073 buffer.append( "<![CDATA[" ); 1074 state.inCData = true; 1075 } 1076 index = text.indexOf( "]]>" ); 1077 while ( index >= 0 ) { 1078 buffer.append( text.substring( 0, index + 2 ) ).append( "]]><![CDATA[" ); 1079 text = text.substring( index + 2 ); 1080 index = text.indexOf( "]]>" ); 1081 } 1082 buffer.append( text ); 1083 saveIndent = printer.getNextIndent(); 1084 printer.setNextIndent( 0 ); 1085 printText( buffer.toString(), true, true ); 1086 printer.setNextIndent( saveIndent ); 1087 1088 } else { 1089 1090 int saveIndent; 1091 1092 if ( state.preserveSpace ) { 1093 // If preserving space then hold of indentation so no 1094 // excessive spaces are printed at line breaks, escape 1095 // the text content without replacing spaces and print 1096 // the text breaking only at line breaks. 1097 saveIndent = printer.getNextIndent(); 1098 printer.setNextIndent( 0 ); 1099 printText( text, true, state.unescaped ); 1100 printer.setNextIndent( saveIndent ); 1101 } else { 1102 printText( text, false, state.unescaped ); 1103 } 1104 } 1105 } 1106 1107 1108 /** 1109 * Returns the suitable entity reference for this character value, 1110 * or null if no such entity exists. Calling this method with <tt>'&'</tt> 1111 * will return <tt>"&amp;"</tt>. 1112 * 1113 * @param ch Character value 1114 * @return Character entity name, or null 1115 */ 1116 protected abstract String getEntityRef( int ch ); 1117 1118 1119 /** 1120 * Called to serializee the DOM element. The element is serialized based on 1121 * the serializer's method (XML, HTML, XHTML). 1122 * 1123 * @param elem The element to serialize 1124 * @throws IOException An I/O exception occured while 1125 * serializing 1126 */ 1127 protected abstract void serializeElement( Element elem ) 1128 throws IOException; 1129 1130 1131 /** 1132 * Comments and PIs cannot be serialized before the root element, 1133 * because the root element serializes the document type, which 1134 * generally comes first. Instead such PIs and comments are 1135 * accumulated inside a vector and serialized by calling this 1136 * method. Will be called when the root element is serialized 1137 * and when the document finished serializing. 1138 * 1139 * @throws IOException An I/O exception occured while 1140 * serializing 1141 */ 1142 protected void serializePreRoot() 1143 throws IOException 1144 { 1145 int i; 1146 1147 if ( preRoot != null ) { 1148 for ( i = 0 ; i < preRoot.size() ; ++i ) { 1149 printText( (String) preRoot.elementAt( i ), true, true ); 1150 if ( indenting ) 1151 printer.breakLine(); 1152 } 1153 preRoot.removeAllElements(); 1154 } 1155 } 1156 1157 1158 //---------------------------------------------// 1159 // Text pretty printing and formatting methods // 1160 //---------------------------------------------// 1161 1162 1163 /** 1164 * Called to print additional text with whitespace handling. 1165 * If spaces are preserved, the text is printed as if by calling 1166 * {@link #printText(String)} with a call to {@link #breakLine} 1167 * for each new line. If spaces are not preserved, the text is 1168 * broken at space boundaries if longer than the line width; 1169 * Multiple spaces are printed as such, but spaces at beginning 1170 * of line are removed. 1171 * 1172 * @param text The text to print 1173 * @param preserveSpace Space preserving flag 1174 * @param unescaped Print unescaped 1175 */ 1176 protected final void printText( char[] chars, int start, int length, 1177 boolean preserveSpace, boolean unescaped ) 1178 throws IOException 1179 { 1180 char ch; 1181 1182 if ( preserveSpace ) { 1183 // Preserving spaces: the text must print exactly as it is, 1184 // without breaking when spaces appear in the text and without 1185 // consolidating spaces. If a line terminator is used, a line 1186 // break will occur. 1187 while ( length-- > 0 ) { 1188 ch = chars[ start ]; 1189 ++start; 1190 if ( ch == '\n' || ch == '\r' || unescaped ) 1191 printer.printText( ch ); 1192 else 1193 printEscaped( ch ); 1194 } 1195 } else { 1196 // Not preserving spaces: print one part at a time, and 1197 // use spaces between parts to break them into different 1198 // lines. Spaces at beginning of line will be stripped 1199 // by printing mechanism. Line terminator is treated 1200 // no different than other text part. 1201 while ( length-- > 0 ) { 1202 ch = chars[ start ]; 1203 ++start; 1204 if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' ) 1205 printer.printSpace(); 1206 else if ( unescaped ) 1207 printer.printText( ch ); 1208 else 1209 printEscaped( ch ); 1210 } 1211 } 1212 } 1213 1214 1215 protected final void printText( String text, boolean preserveSpace, boolean unescaped ) 1216 throws IOException 1217 { 1218 int index; 1219 char ch; 1220 1221 if ( preserveSpace ) { 1222 // Preserving spaces: the text must print exactly as it is, 1223 // without breaking when spaces appear in the text and without 1224 // consolidating spaces. If a line terminator is used, a line 1225 // break will occur. 1226 for ( index = 0 ; index < text.length() ; ++index ) { 1227 ch = text.charAt( index ); 1228 if ( ch == '\n' || ch == '\r' || unescaped ) 1229 printer.printText( ch ); 1230 else 1231 printEscaped( ch ); 1232 } 1233 } else { 1234 // Not preserving spaces: print one part at a time, and 1235 // use spaces between parts to break them into different 1236 // lines. Spaces at beginning of line will be stripped 1237 // by printing mechanism. Line terminator is treated 1238 // no different than other text part. 1239 for ( index = 0 ; index < text.length() ; ++index ) { 1240 ch = text.charAt( index ); 1241 if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' ) 1242 printer.printSpace(); 1243 else if ( unescaped ) 1244 printer.printText( ch ); 1245 else 1246 printEscaped( ch ); 1247 } 1248 } 1249 } 1250 1251 1252 /** 1253 * Print a document type public or system identifier URL. 1254 * Encapsulates the URL in double quotes, escapes non-printing 1255 * characters and print it equivalent to {@link #printText}. 1256 * 1257 * @param url The document type url to print 1258 */ 1259 protected void printDoctypeURL( String url ) 1260 throws IOException 1261 { 1262 int i; 1263 1264 printer.printText( '"' ); 1265 for( i = 0 ; i < url.length() ; ++i ) { 1266 if ( url.charAt( i ) == '"' || url.charAt( i ) < 0x20 || url.charAt( i ) > 0x7F ) { 1267 printer.printText( '%' ); 1268 printer.printText( Integer.toHexString( url.charAt( i ) ) ); 1269 } else 1270 printer.printText( url.charAt( i ) ); 1271 } 1272 printer.printText( '"' ); 1273 } 1274 1275 1276 protected void printEscaped( int ch ) 1277 throws IOException 1278 { 1279 String charRef; 1280 1281 // If there is a suitable entity reference for this 1282 // character, print it. The list of available entity 1283 // references is almost but not identical between 1284 // XML and HTML. 1285 charRef = getEntityRef( ch ); 1286 if ( charRef != null ) { 1287 printer.printText( '&' ); 1288 printer.printText( charRef ); 1289 printer.printText( ';' ); 1290 } else if ( ( ch >= ' ' && encodingInfo.isPrintable(ch) && ch != 0xF7 ) || 1291 ch == '\n' || ch == '\r' || ch == '\t' ) { 1292 // If the character is not printable, print as character reference. 1293 // Non printables are below ASCII space but not tab or line 1294 // terminator, ASCII delete, or above a certain Unicode threshold. 1295 if (ch < 0x10000) { 1296 printer.printText((char)ch ); 1297 } else { 1298 printer.printText((char)(((ch-0x10000)>>10)+0xd800)); 1299 printer.printText((char)(((ch-0x10000)&0x3ff)+0xdc00)); 1300 } 1301 1302 } else { 1303 printer.printText( "&#x" ); 1304 printer.printText(Integer.toHexString(ch)); 1305 printer.printText( ';' ); 1306 } 1307 } 1308 1309 1310 /** 1311 * Escapes a string so it may be printed as text content or attribute 1312 * value. Non printable characters are escaped using character references. 1313 * Where the format specifies a deault entity reference, that reference 1314 * is used (e.g. <tt>&lt;</tt>). 1315 * 1316 * @param source The string to escape 1317 */ 1318 protected void printEscaped( String source ) 1319 throws IOException 1320 { 1321 for ( int i = 0 ; i < source.length() ; ++i ) { 1322 int ch = source.charAt(i); 1323 if ((ch & 0xfc00) == 0xd800 && i+1 < source.length()) { 1324 int lowch = source.charAt(i+1); 1325 if ((lowch & 0xfc00) == 0xdc00) { 1326 ch = 0x10000 + ((ch-0xd800)<<10) + lowch-0xdc00; 1327 i++; 1328 } 1329 } 1330 printEscaped(ch); 1331 } 1332 } 1333 1334 1335 //--------------------------------// 1336 // Element state handling methods // 1337 //--------------------------------// 1338 1339 1340 /** 1341 * Return the state of the current element. 1342 * 1343 * @return Current element state 1344 */ 1345 protected ElementState getElementState() 1346 { 1347 return elementStates[ elementStateCount ]; 1348 } 1349 1350 1351 /** 1352 * Enter a new element state for the specified element. 1353 * Tag name and space preserving is specified, element 1354 * state is initially empty. 1355 * 1356 * @return Current element state, or null 1357 */ 1358 protected ElementState enterElementState( String namespaceURI, String localName, 1359 String rawName, boolean preserveSpace ) 1360 { 1361 ElementState state; 1362 1363 if ( elementStateCount + 1 == elementStates.length ) { 1364 ElementState[] newStates; 1365 1366 // Need to create a larger array of states. This does not happen 1367 // often, unless the document is really deep. 1368 newStates = new ElementState[ elementStates.length + 10 ]; 1369 for ( int i = 0 ; i < elementStates.length ; ++i ) 1370 newStates[ i ] = elementStates[ i ]; 1371 for ( int i = elementStates.length ; i < newStates.length ; ++i ) 1372 newStates[ i ] = new ElementState(); 1373 elementStates = newStates; 1374 } 1375 1376 ++elementStateCount; 1377 state = elementStates[ elementStateCount ]; 1378 state.namespaceURI = namespaceURI; 1379 state.localName = localName; 1380 state.rawName = rawName; 1381 state.preserveSpace = preserveSpace; 1382 state.empty = true; 1383 state.afterElement = false; 1384 state.afterComment = false; 1385 state.doCData = state.inCData = false; 1386 state.unescaped = false; 1387 state.prefixes = prefixes; 1388 1389 prefixes = null; 1390 return state; 1391 } 1392 1393 1394 /** 1395 * Leave the current element state and return to the 1396 * state of the parent element. If this was the root 1397 * element, return to the state of the document. 1398 * 1399 * @return Previous element state 1400 */ 1401 protected ElementState leaveElementState() 1402 { 1403 if ( elementStateCount > 0 ) { 1404 /*Corrected by David Blondeau (blondeau@intalio.com)*/ 1405 prefixes = null; 1406 //_prefixes = _elementStates[ _elementStateCount ].prefixes; 1407 -- elementStateCount; 1408 return elementStates[ elementStateCount ]; 1409 } else 1410 throw new IllegalStateException( "Internal error: element state is zero" ); 1411 } 1412 1413 1414 /** 1415 * Returns true if in the state of the document. 1416 * Returns true before entering any element and after 1417 * leaving the root element. 1418 * 1419 * @return True if in the state of the document 1420 */ 1421 protected boolean isDocumentState() 1422 { 1423 return elementStateCount == 0; 1424 } 1425 1426 1427 /** 1428 * Returns the namespace prefix for the specified URI. 1429 * If the URI has been mapped to a prefix, returns the 1430 * prefix, otherwise returns null. 1431 * 1432 * @param namespaceURI The namespace URI 1433 * @return The namespace prefix if known, or null 1434 */ 1435 protected String getPrefix( String namespaceURI ) 1436 { 1437 String prefix; 1438 1439 if ( prefixes != null ) { 1440 prefix = (String) prefixes.get( namespaceURI ); 1441 if ( prefix != null ) 1442 return prefix; 1443 } 1444 if ( elementStateCount == 0 ) 1445 return null; 1446 else { 1447 for ( int i = elementStateCount ; i > 0 ; --i ) { 1448 if ( elementStates[ i ].prefixes != null ) { 1449 prefix = (String) elementStates[ i ].prefixes.get( namespaceURI ); 1450 if ( prefix != null ) 1451 return prefix; 1452 } 1453 } 1454 } 1455 return null; 1456 } 1457 1458 1459 }