View Javadoc

1   /*
2    * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
3    *
4    * This software is open source.
5    * See the bottom of this file for the licence.
6    */
7   
8   package org.dom4j.io;
9   
10  /***
11   * <p>
12   * <code>OutputFormat</code> represents the format configuration used by
13   * {@linkXMLWriter}and its base classes to format the XML output
14   * </p>
15   * 
16   * @author <a href="mailto:james.strachan@metastuff.com">James Strachan </a>
17   * @version $Revision: 1.17 $
18   */
19  public class OutputFormat implements Cloneable {
20      /*** standard value to indent by, if we are indenting */
21      protected static final String STANDARD_INDENT = "  ";
22  
23      /***
24       * Whether or not to suppress the XML declaration - default is
25       * <code>false</code>
26       */
27      private boolean suppressDeclaration = false;
28  
29      /***
30       * Whether or not to print new line after the XML declaration - default is
31       * <code>true</code>
32       */
33      private boolean newLineAfterDeclaration = true;
34  
35      /*** The encoding format */
36      private String encoding = "UTF-8";
37  
38      /***
39       * Whether or not to output the encoding in the XML declaration - default is
40       * <code>false</code>
41       */
42      private boolean omitEncoding = false;
43  
44      /*** The default indent is no spaces (as original document) */
45      private String indent = null;
46  
47      /***
48       * Whether or not to expand empty elements to
49       * &lt;tagName&gt;&lt;/tagName&gt; - default is <code>false</code>
50       */
51      private boolean expandEmptyElements = false;
52  
53      /***
54       * The default new line flag, set to do new lines only as in original
55       * document
56       */
57      private boolean newlines = false;
58  
59      /*** New line separator */
60      private String lineSeparator = "\n";
61  
62      /*** should we preserve whitespace or not in text nodes? */
63      private boolean trimText = false;
64  
65      /*** pad string-element boundaries with whitespace */
66      private boolean padText = false;
67  
68      /*** Whether or not to use XHTML standard. */
69      private boolean doXHTML = false;
70  
71      /***
72       * Controls when to output a line.separtor every so many tags in case of no
73       * lines and total text trimming.
74       */
75      private int newLineAfterNTags = 0; // zero means don't bother.
76  
77      /*** Quote character to use when writing attributes. */
78      private char attributeQuoteChar = '\"';
79  
80      /***
81       * Creates an <code>OutputFormat</code> with no additional whitespace
82       * (indent or new lines) added. The whitespace from the element text content
83       * is fully preserved.
84       */
85      public OutputFormat() {
86      }
87  
88      /***
89       * Creates an <code>OutputFormat</code> with the given indent added but no
90       * new lines added. All whitespace from element text will be included.
91       * 
92       * @param indent
93       *            is the indent string to be used for indentation (usually a
94       *            number of spaces).
95       */
96      public OutputFormat(String indent) {
97          this.indent = indent;
98      }
99  
100     /***
101      * Creates an <code>OutputFormat</code> with the given indent added with
102      * optional newlines between the Elements. All whitespace from element text
103      * will be included.
104      * 
105      * @param indent
106      *            is the indent string to be used for indentation (usually a
107      *            number of spaces).
108      * @param newlines
109      *            whether new lines are added to layout the
110      */
111     public OutputFormat(String indent, boolean newlines) {
112         this.indent = indent;
113         this.newlines = newlines;
114     }
115 
116     /***
117      * Creates an <code>OutputFormat</code> with the given indent added with
118      * optional newlines between the Elements and the given encoding format.
119      * 
120      * @param indent
121      *            is the indent string to be used for indentation (usually a
122      *            number of spaces).
123      * @param newlines
124      *            whether new lines are added to layout the
125      * @param encoding
126      *            is the text encoding to use for writing the XML
127      */
128     public OutputFormat(String indent, boolean newlines, String encoding) {
129         this.indent = indent;
130         this.newlines = newlines;
131         this.encoding = encoding;
132     }
133 
134     public String getLineSeparator() {
135         return lineSeparator;
136     }
137 
138     /***
139      * <p>
140      * This will set the new-line separator. The default is <code>\n</code>.
141      * Note that if the "newlines" property is false, this value is irrelevant.
142      * To make it output the system default line ending string, call
143      * <code>setLineSeparator(System.getProperty("line.separator"))</code>
144      * </p>
145      * 
146      * @param separator
147      *            <code>String</code> line separator to use.
148      * 
149      * @see #setNewlines(boolean)
150      */
151     public void setLineSeparator(String separator) {
152         lineSeparator = separator;
153     }
154 
155     public boolean isNewlines() {
156         return newlines;
157     }
158 
159     /***
160      * DOCUMENT ME!
161      * 
162      * @param newlines
163      *            <code>true</code> indicates new lines should be printed,
164      *            else new lines are ignored (compacted).
165      * 
166      * @see #setLineSeparator(String)
167      */
168     public void setNewlines(boolean newlines) {
169         this.newlines = newlines;
170     }
171 
172     public String getEncoding() {
173         return encoding;
174     }
175 
176     /***
177      * DOCUMENT ME!
178      * 
179      * @param encoding
180      *            encoding format
181      */
182     public void setEncoding(String encoding) {
183         if (encoding != null) {
184             this.encoding = encoding;
185         }
186     }
187 
188     public boolean isOmitEncoding() {
189         return omitEncoding;
190     }
191 
192     /***
193      * <p>
194      * This will set whether the XML declaration (<code>&lt;?xml version="1.0"
195      * encoding="UTF-8"?&gt;</code>)
196      * includes the encoding of the document. It is common to suppress this in
197      * protocols such as WML and SOAP.
198      * </p>
199      * 
200      * @param omitEncoding
201      *            <code>boolean</code> indicating whether or not the XML
202      *            declaration should indicate the document encoding.
203      */
204     public void setOmitEncoding(boolean omitEncoding) {
205         this.omitEncoding = omitEncoding;
206     }
207 
208     /***
209      * <p>
210      * This will set whether the XML declaration (<code>&lt;?xml version="1.0"
211      * encoding="UTF-8"?&gt;</code>)
212      * is included or not. It is common to suppress this in protocols such as
213      * WML and SOAP.
214      * </p>
215      * 
216      * @param suppressDeclaration
217      *            <code>boolean</code> indicating whether or not the XML
218      *            declaration should be suppressed.
219      */
220     public void setSuppressDeclaration(boolean suppressDeclaration) {
221         this.suppressDeclaration = suppressDeclaration;
222     }
223 
224     /***
225      * DOCUMENT ME!
226      * 
227      * @return true if the output of the XML declaration (<code>&lt;?xml
228      *         version="1.0"?&gt;</code>)
229      *         should be suppressed else false.
230      */
231     public boolean isSuppressDeclaration() {
232         return suppressDeclaration;
233     }
234 
235     /***
236      * <p>
237      * This will set whether a new line is printed after the XML declaration
238      * (assuming it is not supressed.)
239      * </p>
240      * 
241      * @param newLineAfterDeclaration
242      *            <code>boolean</code> indicating whether or not to print new
243      *            line following the XML declaration. The default is true.
244      */
245     public void setNewLineAfterDeclaration(boolean newLineAfterDeclaration) {
246         this.newLineAfterDeclaration = newLineAfterDeclaration;
247     }
248 
249     /***
250      * DOCUMENT ME!
251      * 
252      * @return true if a new line should be printed following XML declaration
253      */
254     public boolean isNewLineAfterDeclaration() {
255         return newLineAfterDeclaration;
256     }
257 
258     public boolean isExpandEmptyElements() {
259         return expandEmptyElements;
260     }
261 
262     /***
263      * <p>
264      * This will set whether empty elements are expanded from
265      * <code>&lt;tagName&gt;</code> to
266      * <code>&lt;tagName&gt;&lt;/tagName&gt;</code>.
267      * </p>
268      * 
269      * @param expandEmptyElements
270      *            <code>boolean</code> indicating whether or not empty
271      *            elements should be expanded.
272      */
273     public void setExpandEmptyElements(boolean expandEmptyElements) {
274         this.expandEmptyElements = expandEmptyElements;
275     }
276 
277     public boolean isTrimText() {
278         return trimText;
279     }
280 
281     /***
282      * <p>
283      * This will set whether the text is output verbatim (false) or with
284      * whitespace stripped as per <code>{@link
285      * org.dom4j.Element#getTextTrim()}</code>.
286      * </p>
287      * 
288      * <p>
289      * </p>
290      * 
291      * <p>
292      * Default: false
293      * </p>
294      * 
295      * @param trimText
296      *            <code>boolean</code> true=>trim the whitespace, false=>use
297      *            text verbatim
298      */
299     public void setTrimText(boolean trimText) {
300         this.trimText = trimText;
301     }
302 
303     public boolean isPadText() {
304         return padText;
305     }
306 
307     /***
308      * <p>
309      * Ensure that text immediately preceded by or followed by an element will
310      * be "padded" with a single space. This is used to allow make
311      * browser-friendly HTML, avoiding trimText's transformation of, e.g.,
312      * <code>The quick &lt;b&gt;brown&lt;/b&gt; fox</code> into <code>The
313      * quick&lt;b&gt;brown&lt;/b&gt;fox</code>
314      * (the latter will run the three separate words together into a single
315      * word). This setting is not too useful if you haven't also called
316      * {@link #setTrimText}.
317      * </p>
318      * 
319      * <p>
320      * The padding string will only be added if the text itself starts or ends
321      * with some whitespace characters.
322      * </p>
323      * 
324      * <p>
325      * Default: false
326      * </p>
327      * 
328      * @param padText
329      *            <code>boolean</code> if true, pad string-element boundaries
330      */
331     public void setPadText(boolean padText) {
332         this.padText = padText;
333     }
334 
335     public String getIndent() {
336         return indent;
337     }
338 
339     /***
340      * <p>
341      * This will set the indent <code>String</code> to use; this is usually a
342      * <code>String</code> of empty spaces. If you pass null, or the empty
343      * string (""), then no indentation will happen.
344      * </p>
345      * Default: none (null)
346      * 
347      * @param indent
348      *            <code>String</code> to use for indentation.
349      */
350     public void setIndent(String indent) {
351         // nullify empty string to void unnecessary indentation code
352         if ((indent != null) && (indent.length() <= 0)) {
353             indent = null;
354         }
355 
356         this.indent = indent;
357     }
358 
359     /***
360      * Set the indent on or off. If setting on, will use the value of
361      * STANDARD_INDENT, which is usually two spaces.
362      * 
363      * @param doIndent
364      *            if true, set indenting on; if false, set indenting off
365      */
366     public void setIndent(boolean doIndent) {
367         if (doIndent) {
368             this.indent = STANDARD_INDENT;
369         } else {
370             this.indent = null;
371         }
372     }
373 
374     /***
375      * <p>
376      * This will set the indent <code>String</code>'s size; an indentSize of
377      * 4 would result in the indention being equivalent to the
378      * <code>String</code> "&nbsp;&nbsp;&nbsp;&nbsp;" (four space characters).
379      * </p>
380      * 
381      * @param indentSize
382      *            <code>int</code> number of spaces in indentation.
383      */
384     public void setIndentSize(int indentSize) {
385         StringBuffer indentBuffer = new StringBuffer();
386 
387         for (int i = 0; i < indentSize; i++) {
388             indentBuffer.append(" ");
389         }
390 
391         this.indent = indentBuffer.toString();
392     }
393 
394     /***
395      * <p>
396      * Whether or not to use the XHTML standard: like HTML but passes an XML
397      * parser with real, closed tags. Also, XHTML CDATA sections will be output
398      * with the CDATA delimiters: ( &quot; <b>&lt;![CDATA[ </b>&quot; and &quot;
399      * <b>]]&gt; </b>&quot; ) otherwise, the class HTMLWriter will output the
400      * CDATA text, but not the delimiters.
401      * </p>
402      * 
403      * <p>
404      * Default is <code>false</code>
405      * </p>
406      * 
407      * @return DOCUMENT ME!
408      */
409     public boolean isXHTML() {
410         return doXHTML;
411     }
412 
413     /***
414      * <p>
415      * This will set whether or not to use the XHTML standard: like HTML but
416      * passes an XML parser with real, closed tags. Also, XHTML CDATA sections
417      * will be output with the CDATA delimiters: ( &quot; <b>&lt;[CDATA[
418      * </b>&quot; and &quot; <b>]]&lt; </b>) otherwise, the class HTMLWriter
419      * will output the CDATA text, but not the delimiters.
420      * </p>
421      * 
422      * <p>
423      * Default: false
424      * </p>
425      * 
426      * @param xhtml
427      *            <code>boolean</code> true=>conform to XHTML, false=>conform
428      *            to HTML, can have unclosed tags, etc.
429      */
430     public void setXHTML(boolean xhtml) {
431         doXHTML = xhtml;
432     }
433 
434     public int getNewLineAfterNTags() {
435         return newLineAfterNTags;
436     }
437 
438     /***
439      * Controls output of a line.separator every tagCount tags when isNewlines
440      * is false. If tagCount equals zero, it means don't do anything special. If
441      * greater than zero, then a line.separator will be output after tagCount
442      * tags have been output. Used when you would like to squeeze the html as
443      * much as possible, but some browsers don't like really long lines. A tag
444      * count of 10 would produce a line.separator in the output after 10 close
445      * tags (including single tags).
446      * 
447      * @param tagCount
448      *            DOCUMENT ME!
449      */
450     public void setNewLineAfterNTags(int tagCount) {
451         newLineAfterNTags = tagCount;
452     }
453 
454     public char getAttributeQuoteCharacter() {
455         return attributeQuoteChar;
456     }
457 
458     /***
459      * Sets the character used to quote attribute values. The specified
460      * character must be a valid XML attribute quote character, otherwise an
461      * <code>IllegalArgumentException</code> will be thrown.
462      * 
463      * @param quoteChar
464      *            The character to use when quoting attribute values.
465      * 
466      * @throws IllegalArgumentException
467      *             If the specified character is not a valid XML attribute quote
468      *             character.
469      */
470     public void setAttributeQuoteCharacter(char quoteChar) {
471         if ((quoteChar == '\'') || (quoteChar == '"')) {
472             attributeQuoteChar = quoteChar;
473         } else {
474             throw new IllegalArgumentException("Invalid attribute quote "
475                     + "character (" + quoteChar + ")");
476         }
477     }
478 
479     /***
480      * Parses command line arguments of the form <code>-omitEncoding
481      * -indentSize 3 -newlines -trimText</code>
482      * 
483      * @param args
484      *            is the array of command line arguments
485      * @param i
486      *            is the index in args to start parsing options
487      * 
488      * @return the index of first parameter that we didn't understand
489      */
490     public int parseOptions(String[] args, int i) {
491         for (int size = args.length; i < size; i++) {
492             if (args[i].equals("-suppressDeclaration")) {
493                 setSuppressDeclaration(true);
494             } else if (args[i].equals("-omitEncoding")) {
495                 setOmitEncoding(true);
496             } else if (args[i].equals("-indent")) {
497                 setIndent(args[++i]);
498             } else if (args[i].equals("-indentSize")) {
499                 setIndentSize(Integer.parseInt(args[++i]));
500             } else if (args[i].startsWith("-expandEmpty")) {
501                 setExpandEmptyElements(true);
502             } else if (args[i].equals("-encoding")) {
503                 setEncoding(args[++i]);
504             } else if (args[i].equals("-newlines")) {
505                 setNewlines(true);
506             } else if (args[i].equals("-lineSeparator")) {
507                 setLineSeparator(args[++i]);
508             } else if (args[i].equals("-trimText")) {
509                 setTrimText(true);
510             } else if (args[i].equals("-padText")) {
511                 setPadText(true);
512             } else if (args[i].startsWith("-xhtml")) {
513                 setXHTML(true);
514             } else {
515                 return i;
516             }
517         }
518 
519         return i;
520     }
521 
522     /***
523      * A static helper method to create the default pretty printing format. This
524      * format consists of an indent of 2 spaces, newlines after each element and
525      * all other whitespace trimmed, and XMTML is false.
526      * 
527      * @return DOCUMENT ME!
528      */
529     public static OutputFormat createPrettyPrint() {
530         OutputFormat format = new OutputFormat();
531         format.setIndentSize(2);
532         format.setNewlines(true);
533         format.setTrimText(true);
534         format.setPadText(true);
535 
536         return format;
537     }
538 
539     /***
540      * A static helper method to create the default compact format. This format
541      * does not have any indentation or newlines after an alement and all other
542      * whitespace trimmed
543      * 
544      * @return DOCUMENT ME!
545      */
546     public static OutputFormat createCompactFormat() {
547         OutputFormat format = new OutputFormat();
548         format.setIndent(false);
549         format.setNewlines(false);
550         format.setTrimText(true);
551 
552         return format;
553     }
554 }
555 
556 /*
557  * Redistribution and use of this software and associated documentation
558  * ("Software"), with or without modification, are permitted provided that the
559  * following conditions are met:
560  * 
561  * 1. Redistributions of source code must retain copyright statements and
562  * notices. Redistributions must also contain a copy of this document.
563  * 
564  * 2. Redistributions in binary form must reproduce the above copyright notice,
565  * this list of conditions and the following disclaimer in the documentation
566  * and/or other materials provided with the distribution.
567  * 
568  * 3. The name "DOM4J" must not be used to endorse or promote products derived
569  * from this Software without prior written permission of MetaStuff, Ltd. For
570  * written permission, please contact dom4j-info@metastuff.com.
571  * 
572  * 4. Products derived from this Software may not be called "DOM4J" nor may
573  * "DOM4J" appear in their names without prior written permission of MetaStuff,
574  * Ltd. DOM4J is a registered trademark of MetaStuff, Ltd.
575  * 
576  * 5. Due credit should be given to the DOM4J Project - http://www.dom4j.org
577  * 
578  * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS ``AS IS'' AND
579  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
580  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
581  * ARE DISCLAIMED. IN NO EVENT SHALL METASTUFF, LTD. OR ITS CONTRIBUTORS BE
582  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
583  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
584  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
585  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
586  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
587  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
588  * POSSIBILITY OF SUCH DAMAGE.
589  * 
590  * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
591  */