1 /**
2 *
3 * Copyright 2006 The Apache Software Foundation
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18
19
20
21
22
23 package org.apache.geronimo.system.configuration;
24
25 import java.io.Writer;
26 import java.io.OutputStream;
27 import java.io.IOException;
28 import java.util.Vector;
29 import java.util.Hashtable;
30
31 import org.w3c.dom.Document;
32 import org.w3c.dom.DocumentFragment;
33 import org.w3c.dom.DocumentType;
34 import org.w3c.dom.Element;
35 import org.w3c.dom.Node;
36 import org.xml.sax.Locator;
37 import org.xml.sax.SAXException;
38
39 /**
40 * Base class for a serializer supporting both DOM and SAX pretty
41 * serializing of XML/HTML/XHTML documents. Derives classes perform
42 * the method-specific serializing, this class provides the common
43 * serializing mechanisms.
44 * <p>
45 * The serializer must be initialized with the proper writer and
46 * output format before it can be used by calling {@link #init}.
47 * The serializer can be reused any number of times, but cannot
48 * be used concurrently by two threads.
49 * <p>
50 * If an output stream is used, the encoding is taken from the
51 * output format (defaults to <tt>UTF-8</tt>). If a writer is
52 * used, make sure the writer uses the same encoding (if applies)
53 * as specified in the output format.
54 * <p>
55 * The serializer supports both DOM and SAX. DOM serializing is done
56 * by calling {@link #serialize} and SAX serializing is done by firing
57 * SAX events and using the serializer as a document handler.
58 * This also applies to derived class.
59 * <p>
60 * If an I/O exception occurs while serializing, the serializer
61 * will not throw an exception directly, but only throw it
62 * at the end of serializing (either DOM or SAX's {@link
63 * org.xml.sax.DocumentHandler#endDocument}.
64 * <p>
65 * For elements that are not specified as whitespace preserving,
66 * the serializer will potentially break long text lines at space
67 * boundaries, indent lines, and serialize elements on separate
68 * lines. Line terminators will be regarded as spaces, and
69 * spaces at beginning of line will be stripped.
70 * <p>
71 * When indenting, the serializer is capable of detecting seemingly
72 * element content, and serializing these elements indented on separate
73 * lines. An element is serialized indented when it is the first or
74 * last child of an element, or immediate following or preceding
75 * another element.
76 *
77 *
78 * @version $Revision: 410741 $ $Date: 2006-05-31 21:35:48 -0700 (Wed, 31 May 2006) $
79 * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
80 * @see Serializer
81 * @see DOMSerializer
82 */
83 public abstract class BaseMarkupSerializer
84 {
85
86 private EncodingInfo encodingInfo;
87
88 /**
89 * Holds array of all element states that have been entered.
90 * The array is automatically resized. When leaving an element,
91 * it's state is not removed but reused when later returning
92 * to the same nesting level.
93 */
94 private ElementState[] elementStates;
95
96 /**
97 * The index of the next state to place in the array,
98 * or one plus the index of the current state. When zero,
99 * we are in no state.
100 */
101 private int elementStateCount;
102
103 /**
104 * Vector holding comments and PIs that come before the root
105 * element (even after it), see {@link #serializePreRoot}.
106 */
107 private Vector preRoot;
108
109 /**
110 * If the document has been started (header serialized), this
111 * flag is set to true so it's not started twice.
112 */
113 protected boolean started;
114
115 /**
116 * True if the serializer has been prepared. This flag is set
117 * to false when the serializer is reset prior to using it,
118 * and to true after it has been prepared for usage.
119 */
120 private boolean prepared;
121
122 /**
123 * Association between namespace URIs (keys) and prefixes (values).
124 * Accumulated here prior to starting an element and placing this
125 * list in the element state.
126 */
127 protected Hashtable prefixes;
128
129 /**
130 * The system identifier of the document type, if known.
131 */
132 protected String docTypePublicId;
133
134
135 /**
136 * The system identifier of the document type, if known.
137 */
138 protected String docTypeSystemId;
139
140
141 /**
142 * The output format associated with this serializer. This will never
143 * be a null reference. If no format was passed to the constructor,
144 * the default one for this document type will be used. The format
145 * object is never changed by the serializer.
146 */
147 protected OutputFormat format;
148
149
150 /**
151 * The printer used for printing text parts.
152 */
153 protected Printer printer;
154
155
156 /**
157 * True if indenting printer.
158 */
159 protected boolean indenting;
160
161
162 /**
163 * The underlying writer.
164 */
165 private Writer writer;
166
167
168 /**
169 * The output stream.
170 */
171 private OutputStream output;
172
173
174
175
176
177
178
179 /**
180 * Protected constructor can only be used by derived class.
181 * Must initialize the serializer before serializing any document,
182 * see {@link #init}.
183 */
184 protected BaseMarkupSerializer( OutputFormat format )
185 {
186 int i;
187
188 elementStates = new ElementState[ 10 ];
189 for ( i = 0 ; i < elementStates.length ; ++i )
190 elementStates[ i ] = new ElementState();
191 this.format = format;
192 }
193
194
195 public void setOutputByteStream( OutputStream output )
196 {
197 if ( output == null )
198 throw new NullPointerException( "SER001 Argument 'output' is null." );
199 this.output = output;
200 writer = null;
201 reset();
202 }
203
204
205 public void setOutputCharStream( Writer writer )
206 {
207 if ( writer == null )
208 throw new NullPointerException( "SER001 Argument 'writer' is null." );
209 this.writer = writer;
210 output = null;
211 reset();
212 }
213
214
215 public void setOutputFormat( OutputFormat format )
216 {
217 if ( format == null )
218 throw new NullPointerException( "SER001 Argument 'format' is null." );
219 this.format = format;
220 reset();
221 }
222
223
224 public boolean reset()
225 {
226 if ( elementStateCount > 1 )
227 throw new IllegalStateException( "Serializer reset in the middle of serialization" );
228 prepared = false;
229 return true;
230 }
231
232
233 protected void prepare()
234 throws IOException
235 {
236 if ( prepared )
237 return;
238
239 if ( writer == null && output == null )
240 throw new IOException( "SER002 No writer supplied for serializer" );
241
242
243
244
245 encodingInfo = format.getEncodingInfo();
246
247 if ( output != null ) {
248 writer = encodingInfo.getWriter(output);
249 }
250
251 if ( format.getIndenting() ) {
252 indenting = true;
253 printer = new IndentPrinter( writer, format );
254 } else {
255 indenting = false;
256 printer = new Printer( writer, format );
257 }
258
259 ElementState state;
260
261 elementStateCount = 0;
262 state = elementStates[ 0 ];
263 state.namespaceURI = null;
264 state.localName = null;
265 state.rawName = null;
266 state.preserveSpace = format.getPreserveSpace();
267 state.empty = true;
268 state.afterElement = false;
269 state.afterComment = false;
270 state.doCData = state.inCData = false;
271 state.prefixes = null;
272
273 docTypePublicId = format.getDoctypePublic();
274 docTypeSystemId = format.getDoctypeSystem();
275 started = false;
276 prepared = true;
277 }
278
279
280
281
282
283
284
285
286 /**
287 * Serializes the DOM element using the previously specified
288 * writer and output format. Throws an exception only if
289 * an I/O exception occured while serializing.
290 *
291 * @param elem The element to serialize
292 * @throws IOException An I/O exception occured while
293 * serializing
294 */
295 public void serialize( Element elem )
296 throws IOException
297 {
298 prepare();
299 serializeNode( elem );
300 printer.flush();
301 if ( printer.getException() != null )
302 throw printer.getException();
303 }
304
305
306 /**
307 * Serializes the DOM document fragmnt using the previously specified
308 * writer and output format. Throws an exception only if
309 * an I/O exception occured while serializing.
310 *
311 * @param elem The element to serialize
312 * @throws IOException An I/O exception occured while
313 * serializing
314 */
315 public void serialize( DocumentFragment frag )
316 throws IOException
317 {
318 prepare();
319 serializeNode( frag );
320 printer.flush();
321 if ( printer.getException() != null )
322 throw printer.getException();
323 }
324
325
326 /**
327 * Serializes the DOM document using the previously specified
328 * writer and output format. Throws an exception only if
329 * an I/O exception occured while serializing.
330 *
331 * @param doc The document to serialize
332 * @throws IOException An I/O exception occured while
333 * serializing
334 */
335 public void serialize( Document doc )
336 throws IOException
337 {
338 prepare();
339 serializeNode( doc );
340 serializePreRoot();
341 printer.flush();
342 if ( printer.getException() != null )
343 throw printer.getException();
344 }
345
346
347
348
349
350
351
352 public void startDocument()
353 throws SAXException
354 {
355 try {
356 prepare();
357 } catch ( IOException except ) {
358 throw new SAXException( except.toString() );
359 }
360
361 }
362
363
364 public void characters( char[] chars, int start, int length )
365 throws SAXException
366 {
367 ElementState state;
368
369 try {
370 state = content();
371
372
373
374
375
376 if ( state.inCData || state.doCData ) {
377 int saveIndent;
378
379
380
381
382 if ( ! state.inCData ) {
383 printer.printText( "<![CDATA[" );
384 state.inCData = true;
385 }
386 saveIndent = printer.getNextIndent();
387 printer.setNextIndent( 0 );
388 for ( int index = 0 ; index < length ; ++index ) {
389 if ( index + 2 < length && chars[ index ] == ']' &&
390 chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) {
391
392 printText( chars, start, index + 2, true, true );
393 printer.printText( "]]><![CDATA[" );
394 start += index + 2;
395 length -= index + 2;
396 index = 0;
397 }
398 }
399 if ( length > 0 )
400 printText( chars, start, length, true, true );
401 printer.setNextIndent( saveIndent );
402
403 } else {
404
405 int saveIndent;
406
407 if ( state.preserveSpace ) {
408
409
410
411
412 saveIndent = printer.getNextIndent();
413 printer.setNextIndent( 0 );
414 printText( chars, start, length, true, state.unescaped );
415 printer.setNextIndent( saveIndent );
416 } else {
417 printText( chars, start, length, false, state.unescaped );
418 }
419 }
420 } catch ( IOException except ) {
421 throw new SAXException( except );
422 }
423 }
424
425
426 public void ignorableWhitespace( char[] chars, int start, int length )
427 throws SAXException
428 {
429 int i;
430
431 try {
432 content();
433
434
435
436
437 if ( indenting ) {
438 printer.setThisIndent( 0 );
439 for ( i = start ; length-- > 0 ; ++i )
440 printer.printText( chars[ i ] );
441 }
442 } catch ( IOException except ) {
443 throw new SAXException( except );
444 }
445 }
446
447
448 public final void processingInstruction( String target, String code )
449 throws SAXException
450 {
451 try {
452 processingInstructionIO( target, code );
453 } catch ( IOException except ) {
454 throw new SAXException( except );
455 }
456 }
457
458 public void processingInstructionIO( String target, String code )
459 throws IOException
460 {
461 int index;
462 StringBuffer buffer;
463 ElementState state;
464
465 state = content();
466 buffer = new StringBuffer( 40 );
467
468
469
470 index = target.indexOf( "?>" );
471 if ( index >= 0 )
472 buffer.append( "<?" ).append( target.substring( 0, index ) );
473 else
474 buffer.append( "<?" ).append( target );
475 if ( code != null ) {
476 buffer.append( ' ' );
477 index = code.indexOf( "?>" );
478 if ( index >= 0 )
479 buffer.append( code.substring( 0, index ) );
480 else
481 buffer.append( code );
482 }
483 buffer.append( "?>" );
484
485
486
487 if ( isDocumentState() ) {
488 if ( preRoot == null )
489 preRoot = new Vector();
490 preRoot.addElement( buffer.toString() );
491 } else {
492 printer.indent();
493 printText( buffer.toString(), true, true );
494 printer.unindent();
495 if ( indenting )
496 state.afterElement = true;
497 }
498 }
499
500
501 public void comment( char[] chars, int start, int length )
502 throws SAXException
503 {
504 try {
505 comment( new String( chars, start, length ) );
506 } catch ( IOException except ) {
507 throw new SAXException( except );
508 }
509 }
510
511
512 public void comment( String text )
513 throws IOException
514 {
515 StringBuffer buffer;
516 int index;
517 ElementState state;
518
519 if ( format.getOmitComments() )
520 return;
521
522 state = content();
523 buffer = new StringBuffer( 40 );
524
525
526 index = text.indexOf( "-->" );
527 if ( index >= 0 )
528 buffer.append( "<!--" ).append( text.substring( 0, index ) ).append( "-->" );
529 else
530 buffer.append( "<!--" ).append( text ).append( "-->" );
531
532
533
534 if ( isDocumentState() ) {
535 if ( preRoot == null )
536 preRoot = new Vector();
537 preRoot.addElement( buffer.toString() );
538 } else {
539
540
541
542 if ( indenting && ! state.preserveSpace)
543 printer.breakLine();
544 printer.indent();
545 printText( buffer.toString(), true, true );
546 printer.unindent();
547 if ( indenting )
548 state.afterElement = true;
549 }
550 state.afterComment = true;
551 state.afterElement = false;
552 }
553
554
555 public void startCDATA()
556 {
557 ElementState state;
558
559 state = getElementState();
560 state.doCData = true;
561 }
562
563
564 public void endCDATA()
565 {
566 ElementState state;
567
568 state = getElementState();
569 state.doCData = false;
570 }
571
572
573 public void startNonEscaping()
574 {
575 ElementState state;
576
577 state = getElementState();
578 state.unescaped = true;
579 }
580
581
582 public void endNonEscaping()
583 {
584 ElementState state;
585
586 state = getElementState();
587 state.unescaped = false;
588 }
589
590
591 public void startPreserving()
592 {
593 ElementState state;
594
595 state = getElementState();
596 state.preserveSpace = true;
597 }
598
599
600 public void endPreserving()
601 {
602 ElementState state;
603
604 state = getElementState();
605 state.preserveSpace = false;
606 }
607
608
609 /**
610 * Called at the end of the document to wrap it up.
611 * Will flush the output stream and throw an exception
612 * if any I/O error occured while serializing.
613 *
614 * @throws SAXException An I/O exception occured during
615 * serializing
616 */
617 public void endDocument()
618 throws SAXException
619 {
620 try {
621
622
623 serializePreRoot();
624
625 printer.flush();
626 } catch ( IOException except ) {
627 throw new SAXException( except );
628 }
629 }
630
631
632 public void startEntity( String name )
633 {
634
635 }
636
637
638 public void endEntity( String name )
639 {
640
641 }
642
643
644 public void setDocumentLocator( Locator locator )
645 {
646
647 }
648
649
650
651
652
653
654
655 public void skippedEntity ( String name )
656 throws SAXException
657 {
658 try {
659 endCDATA();
660 content();
661 printer.printText( '&' );
662 printer.printText( name );
663 printer.printText( ';' );
664 } catch ( IOException except ) {
665 throw new SAXException( except );
666 }
667 }
668
669
670 public void startPrefixMapping( String prefix, String uri )
671 throws SAXException
672 {
673 if ( prefixes == null )
674 prefixes = new Hashtable();
675 prefixes.put( uri, prefix == null ? "" : prefix );
676 }
677
678
679 public void endPrefixMapping( String prefix )
680 throws SAXException
681 {
682 }
683
684
685
686
687
688
689
690 public final void startDTD( String name, String publicId, String systemId )
691 throws SAXException
692 {
693 try {
694 printer.enterDTD();
695 docTypePublicId = publicId;
696 docTypeSystemId = systemId;
697 } catch ( IOException except ) {
698 throw new SAXException( except );
699 }
700 }
701
702
703 public void endDTD()
704 {
705
706 }
707
708
709 public void elementDecl( String name, String model )
710 throws SAXException
711 {
712 try {
713 printer.enterDTD();
714 printer.printText( "<!ELEMENT " );
715 printer.printText( name );
716 printer.printText( ' ' );
717 printer.printText( model );
718 printer.printText( '>' );
719 if ( indenting )
720 printer.breakLine();
721 } catch ( IOException except ) {
722 throw new SAXException( except );
723 }
724 }
725
726
727 public void attributeDecl( String eName, String aName, String type,
728 String valueDefault, String value )
729 throws SAXException
730 {
731 try {
732 printer.enterDTD();
733 printer.printText( "<!ATTLIST " );
734 printer.printText( eName );
735 printer.printText( ' ' );
736 printer.printText( aName );
737 printer.printText( ' ' );
738 printer.printText( type );
739 if ( valueDefault != null ) {
740 printer.printText( ' ' );
741 printer.printText( valueDefault );
742 }
743 if ( value != null ) {
744 printer.printText( " \"" );
745 printEscaped( value );
746 printer.printText( '"' );
747 }
748 printer.printText( '>' );
749 if ( indenting )
750 printer.breakLine();
751 } catch ( IOException except ) {
752 throw new SAXException( except );
753 }
754 }
755
756
757 public void internalEntityDecl( String name, String value )
758 throws SAXException
759 {
760 try {
761 printer.enterDTD();
762 printer.printText( "<!ENTITY " );
763 printer.printText( name );
764 printer.printText( " \"" );
765 printEscaped( value );
766 printer.printText( "\">" );
767 if ( indenting )
768 printer.breakLine();
769 } catch ( IOException except ) {
770 throw new SAXException( except );
771 }
772 }
773
774
775 public void externalEntityDecl( String name, String publicId, String systemId )
776 throws SAXException
777 {
778 try {
779 printer.enterDTD();
780 unparsedEntityDecl( name, publicId, systemId, null );
781 } catch ( IOException except ) {
782 throw new SAXException( except );
783 }
784 }
785
786
787 public void unparsedEntityDecl( String name, String publicId,
788 String systemId, String notationName )
789 throws SAXException
790 {
791 try {
792 printer.enterDTD();
793 if ( publicId == null ) {
794 printer.printText( "<!ENTITY " );
795 printer.printText( name );
796 printer.printText( " SYSTEM " );
797 printDoctypeURL( systemId );
798 } else {
799 printer.printText( "<!ENTITY " );
800 printer.printText( name );
801 printer.printText( " PUBLIC " );
802 printDoctypeURL( publicId );
803 printer.printText( ' ' );
804 printDoctypeURL( systemId );
805 }
806 if ( notationName != null ) {
807 printer.printText( " NDATA " );
808 printer.printText( notationName );
809 }
810 printer.printText( '>' );
811 if ( indenting )
812 printer.breakLine();
813 } catch ( IOException except ) {
814 throw new SAXException( except );
815 }
816 }
817
818
819 public void notationDecl( String name, String publicId, String systemId )
820 throws SAXException
821 {
822 try {
823 printer.enterDTD();
824 if ( publicId != null ) {
825 printer.printText( "<!NOTATION " );
826 printer.printText( name );
827 printer.printText( " PUBLIC " );
828 printDoctypeURL( publicId );
829 if ( systemId != null ) {
830 printer.printText( ' ' );
831 printDoctypeURL( systemId );
832 }
833 } else {
834 printer.printText( "<!NOTATION " );
835 printer.printText( name );
836 printer.printText( " SYSTEM " );
837 printDoctypeURL( systemId );
838 }
839 printer.printText( '>' );
840 if ( indenting )
841 printer.breakLine();
842 } catch ( IOException except ) {
843 throw new SAXException( except );
844 }
845 }
846
847
848
849
850
851
852
853 /**
854 * Serialize the DOM node. This method is shared across XML, HTML and XHTML
855 * serializers and the differences are masked out in a separate {@link
856 * #serializeElement}.
857 *
858 * @param node The node to serialize
859 * @see #serializeElement
860 * @throws IOException An I/O exception occured while
861 * serializing
862 */
863 protected void serializeNode( Node node )
864 throws IOException
865 {
866
867
868
869 switch ( node.getNodeType() ) {
870 case Node.TEXT_NODE : {
871 String text;
872
873 text = node.getNodeValue();
874 if ( text != null )
875 if ( !indenting || getElementState().preserveSpace
876 || (text.replace('\n',' ').trim().length() != 0))
877 characters( text );
878 break;
879 }
880
881 case Node.CDATA_SECTION_NODE : {
882 String text;
883
884 text = node.getNodeValue();
885 if ( text != null ) {
886 startCDATA();
887 characters( text );
888 endCDATA();
889 }
890 break;
891 }
892
893 case Node.COMMENT_NODE : {
894 String text;
895
896 if ( ! format.getOmitComments() ) {
897 text = node.getNodeValue();
898 if ( text != null )
899 comment( text );
900 }
901 break;
902 }
903
904 case Node.ENTITY_REFERENCE_NODE : {
905 Node child;
906
907 endCDATA();
908 content();
909 child = node.getFirstChild();
910 while ( child != null ) {
911 serializeNode( child );
912 child = child.getNextSibling();
913 }
914 break;
915 }
916
917 case Node.PROCESSING_INSTRUCTION_NODE :
918 processingInstructionIO( node.getNodeName(), node.getNodeValue() );
919 break;
920
921 case Node.ELEMENT_NODE :
922 serializeElement( (Element) node );
923 break;
924
925 case Node.DOCUMENT_NODE : {
926 DocumentType docType;
927
928
929
930 docType = ( (Document) node ).getDoctype();
931 if (docType != null) {
932
933
934
935 ( (Document) node ).getImplementation();
936 try {
937 String internal;
938
939 printer.enterDTD();
940 docTypePublicId = docType.getPublicId();
941 docTypeSystemId = docType.getSystemId();
942 internal = docType.getInternalSubset();
943 if ( internal != null && internal.length() > 0 )
944 printer.printText( internal );
945 endDTD();
946 }
947
948 catch (NoSuchMethodError nsme) {
949 Class docTypeClass = docType.getClass();
950
951 String docTypePublicId = null;
952 String docTypeSystemId = null;
953 try {
954 java.lang.reflect.Method getPublicId = docTypeClass.getMethod("getPublicId", null);
955 if (getPublicId.getReturnType().equals(String.class)) {
956 docTypePublicId = (String)getPublicId.invoke(docType, null);
957 }
958 }
959 catch (Exception e) {
960
961 }
962 try {
963 java.lang.reflect.Method getSystemId = docTypeClass.getMethod("getSystemId", null);
964 if (getSystemId.getReturnType().equals(String.class)) {
965 docTypeSystemId = (String)getSystemId.invoke(docType, null);
966 }
967 }
968 catch (Exception e) {
969
970 }
971 this.printer.enterDTD();
972 this.docTypePublicId = docTypePublicId;
973 this.docTypeSystemId = docTypeSystemId;
974 endDTD();
975 }
976 }
977
978 }
979 case Node.DOCUMENT_FRAGMENT_NODE : {
980 Node child;
981
982
983
984
985 child = node.getFirstChild();
986 while ( child != null ) {
987 serializeNode( child );
988 child = child.getNextSibling();
989 }
990 break;
991 }
992
993 default:
994 break;
995 }
996 }
997
998
999 /**
1000 * Must be called by a method about to print any type of content.
1001 * If the element was just opened, the opening tag is closed and
1002 * will be matched to a closing tag. Returns the current element
1003 * state with <tt>empty</tt> and <tt>afterElement</tt> set to false.
1004 *
1005 * @return The current element state
1006 * @throws IOException An I/O exception occured while
1007 * serializing
1008 */
1009 protected ElementState content()
1010 throws IOException
1011 {
1012 ElementState state;
1013
1014 state = getElementState();
1015 if ( ! isDocumentState() ) {
1016
1017 if ( state.inCData && ! state.doCData ) {
1018 printer.printText( "]]>" );
1019 state.inCData = false;
1020 }
1021
1022
1023
1024 if ( state.empty ) {
1025 printer.printText( '>' );
1026 state.empty = false;
1027 }
1028
1029
1030
1031 state.afterElement = false;
1032
1033
1034
1035 state.afterComment = false;
1036 }
1037 return state;
1038 }
1039
1040
1041 /**
1042 * Called to print the text contents in the prevailing element format.
1043 * Since this method is capable of printing text as CDATA, it is used
1044 * for that purpose as well. White space handling is determined by the
1045 * current element state. In addition, the output format can dictate
1046 * whether the text is printed as CDATA or unescaped.
1047 *
1048 * @param text The text to print
1049 * @param unescaped True is should print unescaped
1050 * @throws IOException An I/O exception occured while
1051 * serializing
1052 */
1053 protected void characters( String text )
1054 throws IOException
1055 {
1056 ElementState state;
1057
1058 state = content();
1059
1060
1061
1062
1063 if ( state.inCData || state.doCData ) {
1064 StringBuffer buffer;
1065 int index;
1066 int saveIndent;
1067
1068
1069
1070
1071 buffer = new StringBuffer( text.length() );
1072 if ( ! state.inCData ) {
1073 buffer.append( "<![CDATA[" );
1074 state.inCData = true;
1075 }
1076 index = text.indexOf( "]]>" );
1077 while ( index >= 0 ) {
1078 buffer.append( text.substring( 0, index + 2 ) ).append( "]]><![CDATA[" );
1079 text = text.substring( index + 2 );
1080 index = text.indexOf( "]]>" );
1081 }
1082 buffer.append( text );
1083 saveIndent = printer.getNextIndent();
1084 printer.setNextIndent( 0 );
1085 printText( buffer.toString(), true, true );
1086 printer.setNextIndent( saveIndent );
1087
1088 } else {
1089
1090 int saveIndent;
1091
1092 if ( state.preserveSpace ) {
1093
1094
1095
1096
1097 saveIndent = printer.getNextIndent();
1098 printer.setNextIndent( 0 );
1099 printText( text, true, state.unescaped );
1100 printer.setNextIndent( saveIndent );
1101 } else {
1102 printText( text, false, state.unescaped );
1103 }
1104 }
1105 }
1106
1107
1108 /**
1109 * Returns the suitable entity reference for this character value,
1110 * or null if no such entity exists. Calling this method with <tt>'&'</tt>
1111 * will return <tt>"&amp;"</tt>.
1112 *
1113 * @param ch Character value
1114 * @return Character entity name, or null
1115 */
1116 protected abstract String getEntityRef( int ch );
1117
1118
1119 /**
1120 * Called to serializee the DOM element. The element is serialized based on
1121 * the serializer's method (XML, HTML, XHTML).
1122 *
1123 * @param elem The element to serialize
1124 * @throws IOException An I/O exception occured while
1125 * serializing
1126 */
1127 protected abstract void serializeElement( Element elem )
1128 throws IOException;
1129
1130
1131 /**
1132 * Comments and PIs cannot be serialized before the root element,
1133 * because the root element serializes the document type, which
1134 * generally comes first. Instead such PIs and comments are
1135 * accumulated inside a vector and serialized by calling this
1136 * method. Will be called when the root element is serialized
1137 * and when the document finished serializing.
1138 *
1139 * @throws IOException An I/O exception occured while
1140 * serializing
1141 */
1142 protected void serializePreRoot()
1143 throws IOException
1144 {
1145 int i;
1146
1147 if ( preRoot != null ) {
1148 for ( i = 0 ; i < preRoot.size() ; ++i ) {
1149 printText( (String) preRoot.elementAt( i ), true, true );
1150 if ( indenting )
1151 printer.breakLine();
1152 }
1153 preRoot.removeAllElements();
1154 }
1155 }
1156
1157
1158
1159
1160
1161
1162
1163 /**
1164 * Called to print additional text with whitespace handling.
1165 * If spaces are preserved, the text is printed as if by calling
1166 * {@link #printText(String)} with a call to {@link #breakLine}
1167 * for each new line. If spaces are not preserved, the text is
1168 * broken at space boundaries if longer than the line width;
1169 * Multiple spaces are printed as such, but spaces at beginning
1170 * of line are removed.
1171 *
1172 * @param text The text to print
1173 * @param preserveSpace Space preserving flag
1174 * @param unescaped Print unescaped
1175 */
1176 protected final void printText( char[] chars, int start, int length,
1177 boolean preserveSpace, boolean unescaped )
1178 throws IOException
1179 {
1180 char ch;
1181
1182 if ( preserveSpace ) {
1183
1184
1185
1186
1187 while ( length-- > 0 ) {
1188 ch = chars[ start ];
1189 ++start;
1190 if ( ch == '\n' || ch == '\r' || unescaped )
1191 printer.printText( ch );
1192 else
1193 printEscaped( ch );
1194 }
1195 } else {
1196
1197
1198
1199
1200
1201 while ( length-- > 0 ) {
1202 ch = chars[ start ];
1203 ++start;
1204 if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' )
1205 printer.printSpace();
1206 else if ( unescaped )
1207 printer.printText( ch );
1208 else
1209 printEscaped( ch );
1210 }
1211 }
1212 }
1213
1214
1215 protected final void printText( String text, boolean preserveSpace, boolean unescaped )
1216 throws IOException
1217 {
1218 int index;
1219 char ch;
1220
1221 if ( preserveSpace ) {
1222
1223
1224
1225
1226 for ( index = 0 ; index < text.length() ; ++index ) {
1227 ch = text.charAt( index );
1228 if ( ch == '\n' || ch == '\r' || unescaped )
1229 printer.printText( ch );
1230 else
1231 printEscaped( ch );
1232 }
1233 } else {
1234
1235
1236
1237
1238
1239 for ( index = 0 ; index < text.length() ; ++index ) {
1240 ch = text.charAt( index );
1241 if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' )
1242 printer.printSpace();
1243 else if ( unescaped )
1244 printer.printText( ch );
1245 else
1246 printEscaped( ch );
1247 }
1248 }
1249 }
1250
1251
1252 /**
1253 * Print a document type public or system identifier URL.
1254 * Encapsulates the URL in double quotes, escapes non-printing
1255 * characters and print it equivalent to {@link #printText}.
1256 *
1257 * @param url The document type url to print
1258 */
1259 protected void printDoctypeURL( String url )
1260 throws IOException
1261 {
1262 int i;
1263
1264 printer.printText( '"' );
1265 for( i = 0 ; i < url.length() ; ++i ) {
1266 if ( url.charAt( i ) == '"' || url.charAt( i ) < 0x20 || url.charAt( i ) > 0x7F ) {
1267 printer.printText( '%' );
1268 printer.printText( Integer.toHexString( url.charAt( i ) ) );
1269 } else
1270 printer.printText( url.charAt( i ) );
1271 }
1272 printer.printText( '"' );
1273 }
1274
1275
1276 protected void printEscaped( int ch )
1277 throws IOException
1278 {
1279 String charRef;
1280
1281
1282
1283
1284
1285 charRef = getEntityRef( ch );
1286 if ( charRef != null ) {
1287 printer.printText( '&' );
1288 printer.printText( charRef );
1289 printer.printText( ';' );
1290 } else if ( ( ch >= ' ' && encodingInfo.isPrintable(ch) && ch != 0xF7 ) ||
1291 ch == '\n' || ch == '\r' || ch == '\t' ) {
1292
1293
1294
1295 if (ch < 0x10000) {
1296 printer.printText((char)ch );
1297 } else {
1298 printer.printText((char)(((ch-0x10000)>>10)+0xd800));
1299 printer.printText((char)(((ch-0x10000)&0x3ff)+0xdc00));
1300 }
1301
1302 } else {
1303 printer.printText( "&#x" );
1304 printer.printText(Integer.toHexString(ch));
1305 printer.printText( ';' );
1306 }
1307 }
1308
1309
1310 /**
1311 * Escapes a string so it may be printed as text content or attribute
1312 * value. Non printable characters are escaped using character references.
1313 * Where the format specifies a deault entity reference, that reference
1314 * is used (e.g. <tt>&lt;</tt>).
1315 *
1316 * @param source The string to escape
1317 */
1318 protected void printEscaped( String source )
1319 throws IOException
1320 {
1321 for ( int i = 0 ; i < source.length() ; ++i ) {
1322 int ch = source.charAt(i);
1323 if ((ch & 0xfc00) == 0xd800 && i+1 < source.length()) {
1324 int lowch = source.charAt(i+1);
1325 if ((lowch & 0xfc00) == 0xdc00) {
1326 ch = 0x10000 + ((ch-0xd800)<<10) + lowch-0xdc00;
1327 i++;
1328 }
1329 }
1330 printEscaped(ch);
1331 }
1332 }
1333
1334
1335
1336
1337
1338
1339
1340 /**
1341 * Return the state of the current element.
1342 *
1343 * @return Current element state
1344 */
1345 protected ElementState getElementState()
1346 {
1347 return elementStates[ elementStateCount ];
1348 }
1349
1350
1351 /**
1352 * Enter a new element state for the specified element.
1353 * Tag name and space preserving is specified, element
1354 * state is initially empty.
1355 *
1356 * @return Current element state, or null
1357 */
1358 protected ElementState enterElementState( String namespaceURI, String localName,
1359 String rawName, boolean preserveSpace )
1360 {
1361 ElementState state;
1362
1363 if ( elementStateCount + 1 == elementStates.length ) {
1364 ElementState[] newStates;
1365
1366
1367
1368 newStates = new ElementState[ elementStates.length + 10 ];
1369 for ( int i = 0 ; i < elementStates.length ; ++i )
1370 newStates[ i ] = elementStates[ i ];
1371 for ( int i = elementStates.length ; i < newStates.length ; ++i )
1372 newStates[ i ] = new ElementState();
1373 elementStates = newStates;
1374 }
1375
1376 ++elementStateCount;
1377 state = elementStates[ elementStateCount ];
1378 state.namespaceURI = namespaceURI;
1379 state.localName = localName;
1380 state.rawName = rawName;
1381 state.preserveSpace = preserveSpace;
1382 state.empty = true;
1383 state.afterElement = false;
1384 state.afterComment = false;
1385 state.doCData = state.inCData = false;
1386 state.unescaped = false;
1387 state.prefixes = prefixes;
1388
1389 prefixes = null;
1390 return state;
1391 }
1392
1393
1394 /**
1395 * Leave the current element state and return to the
1396 * state of the parent element. If this was the root
1397 * element, return to the state of the document.
1398 *
1399 * @return Previous element state
1400 */
1401 protected ElementState leaveElementState()
1402 {
1403 if ( elementStateCount > 0 ) {
1404
1405 prefixes = null;
1406
1407 -- elementStateCount;
1408 return elementStates[ elementStateCount ];
1409 } else
1410 throw new IllegalStateException( "Internal error: element state is zero" );
1411 }
1412
1413
1414 /**
1415 * Returns true if in the state of the document.
1416 * Returns true before entering any element and after
1417 * leaving the root element.
1418 *
1419 * @return True if in the state of the document
1420 */
1421 protected boolean isDocumentState()
1422 {
1423 return elementStateCount == 0;
1424 }
1425
1426
1427 /**
1428 * Returns the namespace prefix for the specified URI.
1429 * If the URI has been mapped to a prefix, returns the
1430 * prefix, otherwise returns null.
1431 *
1432 * @param namespaceURI The namespace URI
1433 * @return The namespace prefix if known, or null
1434 */
1435 protected String getPrefix( String namespaceURI )
1436 {
1437 String prefix;
1438
1439 if ( prefixes != null ) {
1440 prefix = (String) prefixes.get( namespaceURI );
1441 if ( prefix != null )
1442 return prefix;
1443 }
1444 if ( elementStateCount == 0 )
1445 return null;
1446 else {
1447 for ( int i = elementStateCount ; i > 0 ; --i ) {
1448 if ( elementStates[ i ].prefixes != null ) {
1449 prefix = (String) elementStates[ i ].prefixes.get( namespaceURI );
1450 if ( prefix != null )
1451 return prefix;
1452 }
1453 }
1454 }
1455 return null;
1456 }
1457
1458
1459 }