1 /**
2 *
3 * Copyright 2006 The Apache Software Foundation
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18
19
20
21
22
23 package org.apache.geronimo.system.configuration;
24
25 import org.w3c.dom.Document;
26 import org.w3c.dom.DocumentType;
27 import org.w3c.dom.Node;
28 import org.w3c.dom.html.HTMLDocument;
29
30
31 /**
32 * Specifies an output format to control the serializer. Based on the
33 * XSLT specification for output format, plus additional parameters.
34 * Used to select the suitable serializer and determine how the
35 * document should be formatted on output.
36 * <p>
37 * The two interesting constructors are:
38 * <ul>
39 * <li>{@link #OutputFormat(String,String,boolean)} creates a format
40 * for the specified method (XML, HTML, Text, etc), encoding and indentation
41 * <li>{@link #OutputFormat(Document,String,boolean)} creates a format
42 * compatible with the document type (XML, HTML, Text, etc), encoding and
43 * indentation
44 * </ul>
45 *
46 *
47 * @version $Revision: 410741 $ $Date: 2006-05-31 21:35:48 -0700 (Wed, 31 May 2006) $
48 * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
49 * <a href="mailto:visco@intalio.com">Keith Visco</a>
50 * @see Serializer
51 * @see Method
52 */
53 public class OutputFormat
54 {
55
56
57 public static class DTD
58 {
59
60 /**
61 * Public identifier for HTML document type.
62 */
63 public static final String HTMLPublicId = "-//W3C//DTD HTML 4.0//EN";
64
65 /**
66 * System identifier for HTML document type.
67 */
68 public static final String HTMLSystemId =
69 "http://www.w3.org/TR/WD-html-in-xml/DTD/xhtml1-strict.dtd";
70
71 /**
72 * Public identifier for XHTML document type.
73 */
74 public static final String XHTMLPublicId =
75 "-//W3C//DTD XHTML 1.0 Strict//EN";
76
77 /**
78 * System identifier for XHTML document type.
79 */
80 public static final String XHTMLSystemId =
81 "http://www.w3.org/TR/WD-html-in-xml/DTD/xhtml1-strict.dtd";
82
83 }
84
85
86 public static class Defaults
87 {
88
89 /**
90 * If indentation is turned on, the default identation
91 * level is 4.
92 *
93 * @see #setIndenting(boolean)
94 */
95 public static final int Indent = 4;
96
97 /**
98 * The default encoding for Web documents it UTF-8.
99 *
100 * @see #getEncoding()
101 */
102 public static final String Encoding = "UTF-8";
103
104 /**
105 * The default line width at which to break long lines
106 * when identing. This is set to 72.
107 */
108 public static final int LineWidth = 72;
109
110 }
111
112
113 /**
114 * Holds the output method specified for this document,
115 * or null if no method was specified.
116 */
117 private String method;
118
119
120 /**
121 * Specifies the version of the output method.
122 */
123 private String version;
124
125
126 /**
127 * The indentation level, or zero if no indentation
128 * was requested.
129 */
130 private int indent = 0;
131
132
133 /**
134 * The encoding to use, if an input stream is used.
135 * The default is always UTF-8.
136 */
137 private String encoding = Defaults.Encoding;
138
139 /**
140 * The EncodingInfo instance for _encoding.
141 */
142 private EncodingInfo encodingInfo = null;
143
144 /**
145 * The specified media type or null.
146 */
147 private String mediaType;
148
149
150 /**
151 * The specified document type system identifier, or null.
152 */
153 private String doctypeSystem;
154
155
156 /**
157 * The specified document type public identifier, or null.
158 */
159 private String doctypePublic;
160
161
162 /**
163 * Ture if the XML declaration should be ommited;
164 */
165 private boolean omitXmlDeclaration = false;
166
167
168 /**
169 * Ture if the DOCTYPE declaration should be ommited;
170 */
171 private boolean omitDoctype = false;
172
173
174 /**
175 * Ture if comments should be ommited;
176 */
177 private boolean omitComments = false;
178
179
180 /**
181 * True if the document type should be marked as standalone.
182 */
183 private boolean standalone = false;
184
185
186 /**
187 * List of element tag names whose text node children must
188 * be output as CDATA.
189 */
190 private String[] cdataElements;
191
192
193 /**
194 * List of element tag names whose text node children must
195 * be output unescaped.
196 */
197 private String[] nonEscapingElements;
198
199
200 /**
201 * The selected line separator.
202 */
203 private String lineSeparator = "\n";
204
205
206 /**
207 * The line width at which to wrap long lines when indenting.
208 */
209 private int _lineWidth = Defaults.LineWidth;
210
211
212 /**
213 * True if spaces should be preserved in elements that do not
214 * specify otherwise, or specify the default behavior.
215 */
216 private boolean preserve = false;
217
218 /** If true, an empty string valued attribute is output as "". If false and
219 * and we are using the HTMLSerializer, then only the attribute name is
220 * serialized. Defaults to false for backwards compatibility.
221 */
222 private boolean preserveEmptyAttributes = false;
223
224 /**
225 * Constructs a new output format with the default values.
226 */
227 public OutputFormat()
228 {
229 }
230
231
232 /**
233 * Constructs a new output format with the default values for
234 * the specified method and encoding. If <tt>indent</tt>
235 * is true, the document will be pretty printed with the default
236 * indentation level and default line wrapping.
237 *
238 * @param method The specified output method
239 * @param encoding The specified encoding
240 * @param indenting True for pretty printing
241 * @see #setEncoding
242 * @see #setIndenting
243 * @see #setMethod
244 */
245 public OutputFormat( String method, String encoding, boolean indenting )
246 {
247 setMethod( method );
248 setEncoding( encoding );
249 setIndenting( indenting );
250 }
251
252
253 /**
254 * Constructs a new output format with the proper method,
255 * document type identifiers and media type for the specified
256 * document.
257 *
258 * @param doc The document to output
259 * @see #whichMethod
260 */
261 public OutputFormat( Document doc )
262 {
263 setMethod( whichMethod( doc ) );
264 setDoctype( whichDoctypePublic( doc ), whichDoctypeSystem( doc ) );
265 setMediaType( whichMediaType( getMethod() ) );
266 }
267
268
269 /**
270 * Constructs a new output format with the proper method,
271 * document type identifiers and media type for the specified
272 * document, and with the specified encoding. If <tt>indent</tt>
273 * is true, the document will be pretty printed with the default
274 * indentation level and default line wrapping.
275 *
276 * @param doc The document to output
277 * @param encoding The specified encoding
278 * @param indenting True for pretty printing
279 * @see #setEncoding
280 * @see #setIndenting
281 * @see #whichMethod
282 */
283 public OutputFormat( Document doc, String encoding, boolean indenting )
284 {
285 this( doc );
286 setEncoding( encoding );
287 setIndenting( indenting );
288 }
289
290
291 /**
292 * Returns the method specified for this output format.
293 * Typically the method will be <tt>xml</tt>, <tt>html</tt>
294 * or <tt>text</tt>, but it might be other values.
295 * If no method was specified, null will be returned
296 * and the most suitable method will be determined for
297 * the document by calling {@link #whichMethod}.
298 *
299 * @return The specified output method, or null
300 */
301 public String getMethod()
302 {
303 return method;
304 }
305
306
307 /**
308 * Sets the method for this output format.
309 *
310 * @see #getMethod
311 * @param method The output method, or null
312 */
313 public void setMethod( String method )
314 {
315 this.method = method;
316 }
317
318
319 /**
320 * Returns the version for this output method.
321 * If no version was specified, will return null
322 * and the default version number will be used.
323 * If the serializerr does not support that particular
324 * version, it should default to a supported version.
325 *
326 * @return The specified method version, or null
327 */
328 public String getVersion()
329 {
330 return version;
331 }
332
333
334 /**
335 * Sets the version for this output method.
336 * For XML the value would be "1.0", for HTML
337 * it would be "4.0".
338 *
339 * @see #getVersion
340 * @param version The output method version, or null
341 */
342 public void setVersion( String version )
343 {
344 this.version = version;
345 }
346
347
348 /**
349 * Returns the indentation specified. If no indentation
350 * was specified, zero is returned and the document
351 * should not be indented.
352 *
353 * @return The indentation or zero
354 * @see #setIndenting
355 */
356 public int getIndent()
357 {
358 return indent;
359 }
360
361
362 /**
363 * Returns true if indentation was specified.
364 */
365 public boolean getIndenting()
366 {
367 return ( indent > 0 );
368 }
369
370
371 /**
372 * Sets the indentation. The document will not be
373 * indented if the indentation is set to zero.
374 * Calling {@link #setIndenting} will reset this
375 * value to zero (off) or the default (on).
376 *
377 * @param indent The indentation, or zero
378 */
379 public void setIndent( int indent )
380 {
381 if ( indent < 0 )
382 this.indent = 0;
383 else
384 this.indent = indent;
385 }
386
387
388 /**
389 * Sets the indentation on and off. When set on, the default
390 * indentation level and default line wrapping is used
391 * (see {@link #DEFAULT_INDENT} and {@link #DEFAULT_LINE_WIDTH}).
392 * To specify a different indentation level or line wrapping,
393 * use {@link #setIndent} and {@link #setLineWidth}.
394 *
395 * @param on True if indentation should be on
396 */
397 public void setIndenting( boolean on )
398 {
399 if ( on ) {
400 indent = Defaults.Indent;
401 _lineWidth = Defaults.LineWidth;
402 } else {
403 indent = 0;
404 _lineWidth = 0;
405 }
406 }
407
408
409 /**
410 * Returns the specified encoding. If no encoding was
411 * specified, the default is always "UTF-8".
412 *
413 * @return The encoding
414 */
415 public String getEncoding()
416 {
417 return encoding;
418 }
419
420
421 /**
422 * Sets the encoding for this output method. If no
423 * encoding was specified, the default is always "UTF-8".
424 * Make sure the encoding is compatible with the one
425 * used by the {@link java.io.Writer}.
426 *
427 * @see #getEncoding
428 * @param encoding The encoding, or null
429 */
430 public void setEncoding( String encoding )
431 {
432 this.encoding = encoding;
433 encodingInfo = null;
434 }
435
436 /**
437 * Sets the encoding for this output method with an <code>EncodingInfo</code>
438 * instance.
439 */
440 public void setEncoding(EncodingInfo encInfo) {
441 encoding = encInfo.getName();
442 encodingInfo = encInfo;
443 }
444
445 /**
446 * Returns an <code>EncodingInfo<code> instance for the encoding.
447 *
448 * @see setEncoding
449 */
450 public EncodingInfo getEncodingInfo() {
451 if (encodingInfo == null)
452 encodingInfo = Encodings.getEncodingInfo(encoding);
453 return encodingInfo;
454 }
455
456 /**
457 * Returns the specified media type, or null.
458 * To determine the media type based on the
459 * document type, use {@link #whichMediaType}.
460 *
461 * @return The specified media type, or null
462 */
463 public String getMediaType()
464 {
465 return mediaType;
466 }
467
468
469 /**
470 * Sets the media type.
471 *
472 * @see #getMediaType
473 * @param mediaType The specified media type
474 */
475 public void setMediaType( String mediaType )
476 {
477 this.mediaType = mediaType;
478 }
479
480
481 /**
482 * Sets the document type public and system identifiers.
483 * Required only if the DOM Document or SAX events do not
484 * specify the document type, and one must be present in
485 * the serialized document. Any document type specified
486 * by the DOM Document or SAX events will override these
487 * values.
488 *
489 * @param publicId The public identifier, or null
490 * @param systemId The system identifier, or null
491 */
492 public void setDoctype( String publicId, String systemId )
493 {
494 doctypePublic = publicId;
495 doctypeSystem = systemId;
496 }
497
498
499 /**
500 * Returns the specified document type public identifier,
501 * or null.
502 */
503 public String getDoctypePublic()
504 {
505 return doctypePublic;
506 }
507
508
509 /**
510 * Returns the specified document type system identifier,
511 * or null.
512 */
513 public String getDoctypeSystem()
514 {
515 return doctypeSystem;
516 }
517
518
519 /**
520 * Returns true if comments should be ommited.
521 * The default is false.
522 */
523 public boolean getOmitComments()
524 {
525 return omitComments;
526 }
527
528
529 /**
530 * Sets comment omitting on and off.
531 *
532 * @param omit True if comments should be ommited
533 */
534 public void setOmitComments( boolean omit )
535 {
536 omitComments = omit;
537 }
538
539
540 /**
541 * Returns true if the DOCTYPE declaration should
542 * be ommited. The default is false.
543 */
544 public boolean getOmitDocumentType()
545 {
546 return omitDoctype;
547 }
548
549
550 /**
551 * Sets DOCTYPE declaration omitting on and off.
552 *
553 * @param omit True if DOCTYPE declaration should be ommited
554 */
555 public void setOmitDocumentType( boolean omit )
556 {
557 omitDoctype = omit;
558 }
559
560
561 /**
562 * Returns true if the XML document declaration should
563 * be ommited. The default is false.
564 */
565 public boolean getOmitXMLDeclaration()
566 {
567 return omitXmlDeclaration;
568 }
569
570
571 /**
572 * Sets XML declaration omitting on and off.
573 *
574 * @param omit True if XML declaration should be ommited
575 */
576 public void setOmitXMLDeclaration( boolean omit )
577 {
578 omitXmlDeclaration = omit;
579 }
580
581
582 /**
583 * Returns true if the document type is standalone.
584 * The default is false.
585 */
586 public boolean getStandalone()
587 {
588 return standalone;
589 }
590
591
592 /**
593 * Sets document DTD standalone. The public and system
594 * identifiers must be null for the document to be
595 * serialized as standalone.
596 *
597 * @param standalone True if document DTD is standalone
598 */
599 public void setStandalone( boolean standalone )
600 {
601 this.standalone = standalone;
602 }
603
604
605 /**
606 * Returns a list of all the elements whose text node children
607 * should be output as CDATA, or null if no such elements were
608 * specified.
609 */
610 public String[] getCDataElements()
611 {
612 return cdataElements;
613 }
614
615
616 /**
617 * Returns true if the text node children of the given elements
618 * should be output as CDATA.
619 *
620 * @param tagName The element's tag name
621 * @return True if should serialize as CDATA
622 */
623 public boolean isCDataElement( String tagName )
624 {
625 int i;
626
627 if ( cdataElements == null )
628 return false;
629 for ( i = 0 ; i < cdataElements.length ; ++i )
630 if ( cdataElements[ i ].equals( tagName ) )
631 return true;
632 return false;
633 }
634
635
636 /**
637 * Sets the list of elements for which text node children
638 * should be output as CDATA.
639 *
640 * @param cdataElements List of CDATA element tag names
641 */
642 public void setCDataElements( String[] cdataElements )
643 {
644 this.cdataElements = cdataElements;
645 }
646
647
648 /**
649 * Returns a list of all the elements whose text node children
650 * should be output unescaped (no character references), or null
651 * if no such elements were specified.
652 */
653 public String[] getNonEscapingElements()
654 {
655 return nonEscapingElements;
656 }
657
658
659 /**
660 * Returns true if the text node children of the given elements
661 * should be output unescaped.
662 *
663 * @param tagName The element's tag name
664 * @return True if should serialize unescaped
665 */
666 public boolean isNonEscapingElement( String tagName )
667 {
668 int i;
669
670 if ( nonEscapingElements == null )
671 return false;
672 for ( i = 0 ; i < nonEscapingElements.length ; ++i )
673 if ( nonEscapingElements[ i ].equals( tagName ) )
674 return true;
675 return false;
676 }
677
678
679 /**
680 * Sets the list of elements for which text node children
681 * should be output unescaped (no character references).
682 *
683 * @param nonEscapingElements List of unescaped element tag names
684 */
685 public void setNonEscapingElements( String[] nonEscapingElements )
686 {
687 this.nonEscapingElements = nonEscapingElements;
688 }
689
690
691
692 /**
693 * Returns a specific line separator to use. The default is the
694 * Web line separator (<tt>\n</tt>). A string is returned to
695 * support double codes (CR + LF).
696 *
697 * @return The specified line separator
698 */
699 public String getLineSeparator()
700 {
701 return lineSeparator;
702 }
703
704
705 /**
706 * Sets the line separator. The default is the Web line separator
707 * (<tt>\n</tt>). The machine's line separator can be obtained
708 * from the system property <tt>line.separator</tt>, but is only
709 * useful if the document is edited on machines of the same type.
710 * For general documents, use the Web line separator.
711 *
712 * @param lineSeparator The specified line separator
713 */
714 public void setLineSeparator( String lineSeparator )
715 {
716 if ( lineSeparator == null )
717 this.lineSeparator = "\n";
718 else
719 this.lineSeparator = lineSeparator;
720 }
721
722
723 /**
724 * Returns true if the default behavior for this format is to
725 * preserve spaces. All elements that do not specify otherwise
726 * or specify the default behavior will be formatted based on
727 * this rule. All elements that specify space preserving will
728 * always preserve space.
729 */
730 public boolean getPreserveSpace()
731 {
732 return preserve;
733 }
734
735
736 /**
737 * Sets space preserving as the default behavior. The default is
738 * space stripping and all elements that do not specify otherwise
739 * or use the default value will not preserve spaces.
740 *
741 * @param preserve True if spaces should be preserved
742 */
743 public void setPreserveSpace( boolean preserve )
744 {
745 this.preserve = preserve;
746 }
747
748
749 /**
750 * Return the selected line width for breaking up long lines.
751 * When indenting, and only when indenting, long lines will be
752 * broken at space boundaries based on this line width.
753 * No line wrapping occurs if this value is zero.
754 */
755 public int getLineWidth()
756 {
757 return _lineWidth;
758 }
759
760
761 /**
762 * Sets the line width. If zero then no line wrapping will
763 * occur. Calling {@link #setIndenting} will reset this
764 * value to zero (off) or the default (on).
765 *
766 * @param lineWidth The line width to use, zero for default
767 * @see #getLineWidth
768 * @see #setIndenting
769 */
770 public void setLineWidth( int lineWidth )
771 {
772 if ( lineWidth <= 0 )
773 _lineWidth = 0;
774 else
775 _lineWidth = lineWidth;
776 }
777
778 /**
779 * Returns the preserveEmptyAttribute flag. If flag is false, then'
780 * attributes with empty string values are output as the attribute
781 * name only (in HTML mode).
782 * @return preserve the preserve flag
783 */
784 public boolean getPreserveEmptyAttributes () {
785 return preserveEmptyAttributes;
786 }
787 /**
788 * Sets the preserveEmptyAttribute flag. If flag is false, then'
789 * attributes with empty string values are output as the attribute
790 * name only (in HTML mode).
791 * @param preserve the preserve flag
792 */
793 public void setPreserveEmptyAttributes (boolean preserve) {
794 preserveEmptyAttributes = preserve;
795 }
796
797 /**
798 * Returns the last printable character based on the selected
799 * encoding. Control characters and non-printable characters
800 * are always printed as character references.
801 */
802 public char getLastPrintable()
803 {
804 if ( getEncoding() != null &&
805 ( getEncoding().equalsIgnoreCase( "ASCII" ) ) )
806 return 0xFF;
807 else
808 return 0xFFFF;
809 }
810
811
812 /**
813 * Determine the output method for the specified document.
814 * If the document is an instance of {@link org.w3c.dom.html.HTMLDocument}
815 * then the method is said to be <tt>html</tt>. If the root
816 * element is 'html' and all text nodes preceding the root
817 * element are all whitespace, then the method is said to be
818 * <tt>html</tt>. Otherwise the method is <tt>xml</tt>.
819 *
820 * @param doc The document to check
821 * @return The suitable method
822 */
823 public static String whichMethod( Document doc )
824 {
825 Node node;
826 String value;
827 int i;
828
829
830
831 if ( doc instanceof HTMLDocument )
832 return Method.HTML;
833
834
835
836
837
838
839
840 node = doc.getFirstChild();
841 while (node != null) {
842
843 if ( node.getNodeType() == Node.ELEMENT_NODE ) {
844 if ( node.getNodeName().equalsIgnoreCase( "html" ) ) {
845 return Method.HTML;
846 } else if ( node.getNodeName().equalsIgnoreCase( "root" ) ) {
847 return Method.FOP;
848 } else {
849 return Method.XML;
850 }
851 } else if ( node.getNodeType() == Node.TEXT_NODE ) {
852
853
854
855 value = node.getNodeValue();
856 for ( i = 0 ; i < value.length() ; ++i )
857 if ( value.charAt( i ) != 0x20 && value.charAt( i ) != 0x0A &&
858 value.charAt( i ) != 0x09 && value.charAt( i ) != 0x0D )
859 return Method.XML;
860 }
861 node = node.getNextSibling();
862 }
863
864 return Method.XML;
865 }
866
867
868 /**
869 * Returns the document type public identifier
870 * specified for this document, or null.
871 */
872 public static String whichDoctypePublic( Document doc )
873 {
874 DocumentType doctype;
875
876
877 doctype = doc.getDoctype();
878 if ( doctype != null ) {
879
880
881 try {
882 return doctype.getPublicId();
883 } catch ( Error except ) { }
884 }
885
886 if ( doc instanceof HTMLDocument )
887 return DTD.XHTMLPublicId;
888 return null;
889 }
890
891
892 /**
893 * Returns the document type system identifier
894 * specified for this document, or null.
895 */
896 public static String whichDoctypeSystem( Document doc )
897 {
898 DocumentType doctype;
899
900
901 doctype = doc.getDoctype();
902 if ( doctype != null ) {
903
904
905 try {
906 return doctype.getSystemId();
907 } catch ( Error except ) { }
908 }
909
910 if ( doc instanceof HTMLDocument )
911 return DTD.XHTMLSystemId;
912 return null;
913 }
914
915
916 /**
917 * Returns the suitable media format for a document
918 * output with the specified method.
919 */
920 public static String whichMediaType( String method )
921 {
922 if ( method.equalsIgnoreCase( Method.XML ) )
923 return "text/xml";
924 if ( method.equalsIgnoreCase( Method.HTML ) )
925 return "text/html";
926 if ( method.equalsIgnoreCase( Method.XHTML ) )
927 return "text/html";
928 if ( method.equalsIgnoreCase( Method.TEXT ) )
929 return "text/plain";
930 if ( method.equalsIgnoreCase( Method.FOP ) )
931 return "application/pdf";
932 return null;
933 }
934
935
936 }
937