View Javadoc

1   /*
2    * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
3    *
4    * This software is open source.
5    * See the bottom of this file for the licence.
6    */
7   
8   package org.dom4j.io;
9   
10  import java.io.File;
11  import java.io.InputStream;
12  import java.io.Reader;
13  import java.net.URL;
14  import java.util.HashMap;
15  import java.util.Iterator;
16  import java.util.Map;
17  
18  import org.dom4j.Document;
19  import org.dom4j.DocumentException;
20  import org.dom4j.DocumentFactory;
21  
22  import org.xml.sax.InputSource;
23  import org.xml.sax.SAXException;
24  import org.xml.sax.XMLReader;
25  
26  /***
27   * The SAXModifier reads, modifies and writes XML documents using SAX.
28   * 
29   * <p>
30   * Registered {@link ElementModifier}objects can provide modifications to (part
31   * of) the xml tree, while the document is still being processed. This makes it
32   * possible to change large xml documents without having them in memory.
33   * </p>
34   * 
35   * <p>
36   * The modified document is written when the {@link XMLWriter}is specified.
37   * </p>
38   * 
39   * @author Wonne Keysers (Realsoftware.be)
40   * 
41   * @see org.dom4j.io.SAXReader
42   * @see org.dom4j.io.XMLWriter
43   */
44  public class SAXModifier {
45      private XMLWriter xmlWriter;
46  
47      private XMLReader xmlReader;
48  
49      private boolean pruneElements;
50  
51      private SAXModifyReader modifyReader;
52  
53      private HashMap modifiers = new HashMap();
54  
55      /***
56       * Creates a new modifier. <br>
57       * The XMLReader to parse the source will be created via the
58       * org.xml.sax.driver system property or JAXP if the system property is not
59       * set.
60       */
61      public SAXModifier() {
62      }
63  
64      /***
65       * Creates a new modifier. <br>
66       * The XMLReader to parse the source will be created via the
67       * org.xml.sax.driver system property or JAXP if the system property is not
68       * set.
69       * 
70       * @param pruneElements
71       *            Set to true when the modified document must NOT be kept in
72       *            memory.
73       */
74      public SAXModifier(boolean pruneElements) {
75          this.pruneElements = pruneElements;
76      }
77  
78      /***
79       * Creates a new modifier that will the specified {@link
80       * org.xml.sax.XMLReader} to parse the source.
81       * 
82       * @param xmlReader
83       *            The XMLReader to use
84       */
85      public SAXModifier(XMLReader xmlReader) {
86          this.xmlReader = xmlReader;
87      }
88  
89      /***
90       * Creates a new modifier that will the specified {@link
91       * org.xml.sax.XMLReader} to parse the source.
92       * 
93       * @param xmlReader
94       *            The XMLReader to use
95       * @param pruneElements
96       *            Set to true when the modified document must NOT be kept in
97       *            memory.
98       */
99      public SAXModifier(XMLReader xmlReader, boolean pruneElements) {
100         this.xmlReader = xmlReader;
101     }
102 
103     /***
104      * Reads a Document from the given {@link java.io.File}and writes it to the
105      * specified {@link XMLWriter}using SAX. Registered {@linkElementModifier}
106      * objects are invoked on the fly.
107      * 
108      * @param source
109      *            is the <code>File</code> to read from.
110      * 
111      * @return the newly created Document instance
112      * 
113      * @throws DocumentException
114      *             DocumentException org.dom4j.DocumentException} if an error
115      *             occurs during parsing.
116      */
117     public Document modify(File source) throws DocumentException {
118         try {
119             return installModifyReader().read(source);
120         } catch (SAXModifyException ex) {
121             Throwable cause = ex.getCause();
122             throw new DocumentException(cause.getMessage(), cause);
123         }
124     }
125 
126     /***
127      * Reads a Document from the given {@link org.xml.sax.InputSource}and
128      * writes it to the specified {@link XMLWriter}using SAX. Registered
129      * {@link ElementModifier}objects are invoked on the fly.
130      * 
131      * @param source
132      *            is the <code>org.xml.sax.InputSource</code> to read from.
133      * 
134      * @return the newly created Document instance
135      * 
136      * @throws DocumentException
137      *             DocumentException org.dom4j.DocumentException} if an error
138      *             occurs during parsing.
139      */
140     public Document modify(InputSource source) throws DocumentException {
141         try {
142             return installModifyReader().read(source);
143         } catch (SAXModifyException ex) {
144             Throwable cause = ex.getCause();
145             throw new DocumentException(cause.getMessage(), cause);
146         }
147     }
148 
149     /***
150      * Reads a Document from the given {@link java.io.InputStream}and writes it
151      * to the specified {@link XMLWriter}using SAX. Registered {@link
152      * ElementModifier} objects are invoked on the fly.
153      * 
154      * @param source
155      *            is the <code>java.io.InputStream</code> to read from.
156      * 
157      * @return the newly created Document instance
158      * 
159      * @throws DocumentException
160      *             DocumentException org.dom4j.DocumentException} if an error
161      *             occurs during parsing.
162      */
163     public Document modify(InputStream source) throws DocumentException {
164         try {
165             return installModifyReader().read(source);
166         } catch (SAXModifyException ex) {
167             Throwable cause = ex.getCause();
168             throw new DocumentException(cause.getMessage(), cause);
169         }
170     }
171 
172     /***
173      * Reads a Document from the given {@link java.io.InputStream}and writes it
174      * to the specified {@link XMLWriter}using SAX. Registered {@link
175      * ElementModifier} objects are invoked on the fly.
176      * 
177      * @param source
178      *            is the <code>java.io.InputStream</code> to read from.
179      * @param systemId
180      *            DOCUMENT ME!
181      * 
182      * @return the newly created Document instance
183      * 
184      * @throws DocumentException
185      *             DocumentException org.dom4j.DocumentException} if an error
186      *             occurs during parsing.
187      */
188     public Document modify(InputStream source, String systemId)
189             throws DocumentException {
190         try {
191             return installModifyReader().read(source);
192         } catch (SAXModifyException ex) {
193             Throwable cause = ex.getCause();
194             throw new DocumentException(cause.getMessage(), cause);
195         }
196     }
197 
198     /***
199      * Reads a Document from the given {@link java.io.Reader}and writes it to
200      * the specified {@link XMLWriter}using SAX. Registered {@link
201      * ElementModifier} objects are invoked on the fly.
202      * 
203      * @param source
204      *            is the <code>java.io.Reader</code> to read from.
205      * 
206      * @return the newly created Document instance
207      * 
208      * @throws DocumentException
209      *             DocumentException org.dom4j.DocumentException} if an error
210      *             occurs during parsing.
211      */
212     public Document modify(Reader source) throws DocumentException {
213         try {
214             return installModifyReader().read(source);
215         } catch (SAXModifyException ex) {
216             Throwable cause = ex.getCause();
217             throw new DocumentException(cause.getMessage(), cause);
218         }
219     }
220 
221     /***
222      * Reads a Document from the given {@link java.io.Reader}and writes it to
223      * the specified {@link XMLWriter}using SAX. Registered {@link
224      * ElementModifier} objects are invoked on the fly.
225      * 
226      * @param source
227      *            is the <code>java.io.Reader</code> to read from.
228      * @param systemId
229      *            DOCUMENT ME!
230      * 
231      * @return the newly created Document instance
232      * 
233      * @throws DocumentException
234      *             DocumentException org.dom4j.DocumentException} if an error
235      *             occurs during parsing.
236      */
237     public Document modify(Reader source, String systemId)
238             throws DocumentException {
239         try {
240             return installModifyReader().read(source);
241         } catch (SAXModifyException ex) {
242             Throwable cause = ex.getCause();
243             throw new DocumentException(cause.getMessage(), cause);
244         }
245     }
246 
247     /***
248      * Reads a Document from the given {@link java.net.URL}and writes it to the
249      * specified {@link XMLWriter}using SAX. Registered {@linkElementModifier}
250      * objects are invoked on the fly.
251      * 
252      * @param source
253      *            is the <code>java.net.URL</code> to read from.
254      * 
255      * @return the newly created Document instance
256      * 
257      * @throws DocumentException
258      *             DocumentException org.dom4j.DocumentException} if an error
259      *             occurs during parsing.
260      */
261     public Document modify(URL source) throws DocumentException {
262         try {
263             return installModifyReader().read(source);
264         } catch (SAXModifyException ex) {
265             Throwable cause = ex.getCause();
266             throw new DocumentException(cause.getMessage(), cause);
267         }
268     }
269 
270     /***
271      * Reads a Document from the given URL or filename and writes it to the
272      * specified {@link XMLWriter}using SAX. Registered {@linkElementModifier}
273      * objects are invoked on the fly.
274      * 
275      * @param source
276      *            is the URL or filename to read from.
277      * 
278      * @return the newly created Document instance
279      * 
280      * @throws DocumentException
281      *             DocumentException org.dom4j.DocumentException} if an error
282      *             occurs during parsing.
283      */
284     public Document modify(String source) throws DocumentException {
285         try {
286             return installModifyReader().read(source);
287         } catch (SAXModifyException ex) {
288             Throwable cause = ex.getCause();
289             throw new DocumentException(cause.getMessage(), cause);
290         }
291     }
292 
293     /***
294      * Adds the {@link ElementModifier}to be called when the specified element
295      * path is encounted while parsing the source.
296      * 
297      * @param path
298      *            The element path to be handled
299      * @param modifier
300      *            The {@link ElementModifier}to be called by the event based
301      *            processor.
302      */
303     public void addModifier(String path, ElementModifier modifier) {
304         this.modifiers.put(path, modifier);
305     }
306 
307     /***
308      * Removes all registered {@link ElementModifier}instances from the event
309      * based processor.
310      */
311     public void resetModifiers() {
312         this.modifiers.clear();
313         getSAXModifyReader().resetHandlers();
314     }
315 
316     /***
317      * Removes the {@link ElementModifier}from the event based processor, for
318      * the specified element path.
319      * 
320      * @param path
321      *            The path to remove the {@link ElementModifier}for.
322      */
323     public void removeModifier(String path) {
324         this.modifiers.remove(path);
325         getSAXModifyReader().removeHandler(path);
326     }
327 
328     /***
329      * Get the {@link org.dom4j.DocumentFactory}used to create the DOM4J
330      * document structure
331      * 
332      * @return <code>DocumentFactory</code> that will be used
333      */
334     public DocumentFactory getDocumentFactory() {
335         return getSAXModifyReader().getDocumentFactory();
336     }
337 
338     /***
339      * Sets the {@link org.dom4j.DocumentFactory}used to create the DOM4J
340      * document tree.
341      * 
342      * @param factory
343      *            <code>DocumentFactory</code> to be used
344      */
345     public void setDocumentFactory(DocumentFactory factory) {
346         getSAXModifyReader().setDocumentFactory(factory);
347     }
348 
349     /***
350      * Returns the current {@link XMLWriter}.
351      * 
352      * @return XMLWriter
353      */
354     public XMLWriter getXMLWriter() {
355         return this.xmlWriter;
356     }
357 
358     /***
359      * Sets the {@link XMLWriter}used to write the modified document.
360      * 
361      * @param writer
362      *            The writer to use.
363      */
364     public void setXMLWriter(XMLWriter writer) {
365         this.xmlWriter = writer;
366     }
367 
368     /***
369      * Returns true when xml elements are not kept in memory while parsing. The
370      * {@link org.dom4j.Document}returned by the modify methods will be null.
371      * 
372      * @return Returns the pruneElements.
373      */
374     public boolean isPruneElements() {
375         return pruneElements;
376     }
377 
378     private SAXReader installModifyReader() throws DocumentException {
379         try {
380             SAXModifyReader reader = getSAXModifyReader();
381 
382             if (isPruneElements()) {
383                 modifyReader.setDispatchHandler(new PruningDispatchHandler());
384             }
385 
386             reader.resetHandlers();
387 
388             Iterator modifierIt = this.modifiers.entrySet().iterator();
389 
390             while (modifierIt.hasNext()) {
391                 Map.Entry entry = (Map.Entry) modifierIt.next();
392 
393                 SAXModifyElementHandler handler = new SAXModifyElementHandler(
394                         (ElementModifier) entry.getValue());
395                 reader.addHandler((String) entry.getKey(), handler);
396             }
397 
398             reader.setXMLWriter(getXMLWriter());
399             reader.setXMLReader(getXMLReader());
400 
401             return reader;
402         } catch (SAXException ex) {
403             throw new DocumentException(ex.getMessage(), ex);
404         }
405     }
406 
407     private XMLReader getXMLReader() throws SAXException {
408         if (this.xmlReader == null) {
409             xmlReader = SAXHelper.createXMLReader(false);
410         }
411 
412         return this.xmlReader;
413     }
414 
415     private SAXModifyReader getSAXModifyReader() {
416         if (modifyReader == null) {
417             modifyReader = new SAXModifyReader();
418         }
419 
420         return modifyReader;
421     }
422 }
423 
424 /*
425  * Redistribution and use of this software and associated documentation
426  * ("Software"), with or without modification, are permitted provided that the
427  * following conditions are met:
428  * 
429  * 1. Redistributions of source code must retain copyright statements and
430  * notices. Redistributions must also contain a copy of this document.
431  * 
432  * 2. Redistributions in binary form must reproduce the above copyright notice,
433  * this list of conditions and the following disclaimer in the documentation
434  * and/or other materials provided with the distribution.
435  * 
436  * 3. The name "DOM4J" must not be used to endorse or promote products derived
437  * from this Software without prior written permission of MetaStuff, Ltd. For
438  * written permission, please contact dom4j-info@metastuff.com.
439  * 
440  * 4. Products derived from this Software may not be called "DOM4J" nor may
441  * "DOM4J" appear in their names without prior written permission of MetaStuff,
442  * Ltd. DOM4J is a registered trademark of MetaStuff, Ltd.
443  * 
444  * 5. Due credit should be given to the DOM4J Project - http://www.dom4j.org
445  * 
446  * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS ``AS IS'' AND
447  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
448  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
449  * ARE DISCLAIMED. IN NO EVENT SHALL METASTUFF, LTD. OR ITS CONTRIBUTORS BE
450  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
451  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
452  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
453  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
454  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
455  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
456  * POSSIBILITY OF SUCH DAMAGE.
457  * 
458  * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
459  */