View Javadoc

1   /*
2    * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
3    *
4    * This software is open source.
5    * See the bottom of this file for the licence.
6    */
7   
8   package org.dom4j.io;
9   
10  import java.io.InputStream;
11  import java.io.Reader;
12  import java.util.Iterator;
13  
14  import javax.xml.namespace.QName;
15  import javax.xml.stream.XMLEventReader;
16  import javax.xml.stream.XMLInputFactory;
17  import javax.xml.stream.XMLStreamConstants;
18  import javax.xml.stream.XMLStreamException;
19  import javax.xml.stream.events.Attribute;
20  import javax.xml.stream.events.Characters;
21  import javax.xml.stream.events.Comment;
22  import javax.xml.stream.events.EndElement;
23  import javax.xml.stream.events.EntityReference;
24  import javax.xml.stream.events.Namespace;
25  import javax.xml.stream.events.ProcessingInstruction;
26  import javax.xml.stream.events.StartDocument;
27  import javax.xml.stream.events.StartElement;
28  import javax.xml.stream.events.XMLEvent;
29  
30  import org.dom4j.CharacterData;
31  import org.dom4j.Document;
32  import org.dom4j.DocumentFactory;
33  import org.dom4j.Element;
34  import org.dom4j.Entity;
35  import org.dom4j.Node;
36  
37  /***
38   * Reads a DOM4J {@link Document}, as well as other {@link Node}s, from a StAX
39   * {@link XMLEventReader}.
40   * 
41   * @author Christian Niles
42   */
43  public class STAXEventReader {
44      /*** Reference to the DocumentFactory used to build DOM4J nodes. */
45      private DocumentFactory factory;
46  
47      /*** A StAX input factory, used to construct streams from IO streams. */
48      private XMLInputFactory inputFactory = XMLInputFactory.newInstance();
49  
50      /***
51       * Constructs a default <code>STAXEventReader</code> instance with a
52       * default {@link DocumentFactory}.
53       */
54      public STAXEventReader() {
55          this.factory = DocumentFactory.getInstance();
56      }
57  
58      /***
59       * Constructs a <code>STAXEventReader</code> instance that uses the
60       * specified {@link DocumentFactory}to construct DOM4J {@link Node}s.
61       * 
62       * @param factory
63       *            The DocumentFactory to use when constructing DOM4J nodes, or
64       *            <code>null</code> if a default should be used.
65       */
66      public STAXEventReader(DocumentFactory factory) {
67          if (factory != null) {
68              this.factory = factory;
69          } else {
70              this.factory = DocumentFactory.getInstance();
71          }
72      }
73  
74      /***
75       * Sets the DocumentFactory to be used when constructing DOM4J nodes.
76       * 
77       * @param documentFactory
78       *            The DocumentFactory to use when constructing DOM4J nodes, or
79       *            <code>null</code> if a default should be used.
80       */
81      public void setDocumentFactory(DocumentFactory documentFactory) {
82          if (documentFactory != null) {
83              this.factory = documentFactory;
84          } else {
85              this.factory = DocumentFactory.getInstance();
86          }
87      }
88  
89      /***
90       * Constructs a StAX event stream from the provided I/O stream and reads a
91       * DOM4J document from it.
92       * 
93       * @param is
94       *            The I/O stream from which the Document will be read.
95       * 
96       * @return The Document that was read from the stream.
97       * 
98       * @throws XMLStreamException
99       *             If an error occurs reading content from the stream.
100      */
101     public Document readDocument(InputStream is) throws XMLStreamException {
102         return readDocument(is, null);
103     }
104 
105     /***
106      * Constructs a StAX event stream from the provided I/O character stream and
107      * reads a DOM4J document from it.
108      * 
109      * @param reader
110      *            The character stream from which the Document will be read.
111      * 
112      * @return The Document that was read from the stream.
113      * 
114      * @throws XMLStreamException
115      *             If an error occurs reading content from the stream.
116      */
117     public Document readDocument(Reader reader) throws XMLStreamException {
118         return readDocument(reader, null);
119     }
120 
121     /***
122      * Constructs a StAX event stream from the provided I/O stream and reads a
123      * DOM4J document from it.
124      * 
125      * @param is
126      *            The I/O stream from which the Document will be read.
127      * @param systemId
128      *            A system id used to resolve entities.
129      * 
130      * @return The Document that was read from the stream.
131      * 
132      * @throws XMLStreamException
133      *             If an error occurs reading content from the stream.
134      */
135     public Document readDocument(InputStream is, String systemId)
136             throws XMLStreamException {
137         XMLEventReader eventReader = inputFactory.createXMLEventReader(
138                 systemId, is);
139 
140         try {
141             return readDocument(eventReader);
142         } finally {
143             eventReader.close();
144         }
145     }
146 
147     /***
148      * Constructs a StAX event stream from the provided I/O character stream and
149      * reads a DOM4J document from it.
150      * 
151      * @param reader
152      *            The character stream from which the Document will be read.
153      * @param systemId
154      *            A system id used to resolve entities.
155      * 
156      * @return The Document that was read from the stream.
157      * 
158      * @throws XMLStreamException
159      *             If an error occurs reading content from the stream.
160      */
161     public Document readDocument(Reader reader, String systemId)
162             throws XMLStreamException {
163         XMLEventReader eventReader = inputFactory.createXMLEventReader(
164                 systemId, reader);
165 
166         try {
167             return readDocument(eventReader);
168         } finally {
169             eventReader.close();
170         }
171     }
172 
173     /***
174      * Reads a {@link Node}from the event stream. If the next event is a
175      * {@link StartElement}, all events until the closing {@link EndElement}
176      * will be read, and the resulting nodes will be added to the returned
177      * {@link Element}.
178      * 
179      * <p>
180      * <strong>Pre-Conditions </strong>: The stream must be positioned before an
181      * event other than an <code>EndElement</code>,<code>EndDocument</code>,
182      * or any DTD-related events, which are not currently supported.
183      * </p>
184      * 
185      * @param reader
186      *            The reader from which events will be read.
187      * 
188      * @return A DOM4J {@link Node}constructed from the read events.
189      * 
190      * @throws XMLStreamException
191      *             If an error occurs reading from the stream, or the stream was
192      *             positioned before an unsupported event.
193      */
194     public Node readNode(XMLEventReader reader) throws XMLStreamException {
195         XMLEvent event = reader.peek();
196 
197         if (event.isStartElement()) {
198             return readElement(reader);
199         } else if (event.isCharacters()) {
200             return readCharacters(reader);
201         } else if (event.isStartDocument()) {
202             return readDocument(reader);
203         } else if (event.isProcessingInstruction()) {
204             return readProcessingInstruction(reader);
205         } else if (event.isEntityReference()) {
206             return readEntityReference(reader);
207         } else if (event.isAttribute()) {
208             return readAttribute(reader);
209         } else if (event.isNamespace()) {
210             return readNamespace(reader);
211         } else {
212             throw new XMLStreamException("Unsupported event: " + event);
213         }
214     }
215 
216     /***
217      * Reads a DOM4J {@link Document}from the provided stream. The stream
218      * should be positioned at the start of a document, or before a {@link
219      * StartElement} event.
220      * 
221      * @param reader
222      *            The event stream from which to read the {@link Document}.
223      * 
224      * @return The {@link Document}that was read from the stream.
225      * 
226      * @throws XMLStreamException
227      *             If an error occurs reading events from the stream.
228      */
229     public Document readDocument(XMLEventReader reader)
230             throws XMLStreamException {
231         Document doc = null;
232 
233         while (reader.hasNext()) {
234             XMLEvent nextEvent = reader.peek();
235             int type = nextEvent.getEventType();
236 
237             switch (type) {
238                 case XMLStreamConstants.START_DOCUMENT:
239 
240                     StartDocument event = (StartDocument) reader.nextEvent();
241 
242                     if (doc == null) {
243                         // create document
244                         if (event.encodingSet()) {
245                             String encodingScheme = event
246                                     .getCharacterEncodingScheme();
247                             doc = factory.createDocument(encodingScheme);
248                         } else {
249                             doc = factory.createDocument();
250                         }
251                     } else {
252                         // duplicate or misplaced xml declaration
253                         String msg = "Unexpected StartDocument event";
254                         throw new XMLStreamException(msg, event.getLocation());
255                     }
256 
257                     break;
258 
259                 case XMLStreamConstants.END_DOCUMENT:
260                 case XMLStreamConstants.SPACE:
261                 case XMLStreamConstants.CHARACTERS:
262 
263                     // skip end document and space outside the root element
264                     reader.nextEvent();
265 
266                     break;
267 
268                 default:
269 
270                     if (doc == null) {
271                         // create document
272                         doc = factory.createDocument();
273                     }
274 
275                     Node n = readNode(reader);
276                     doc.add(n);
277             }
278         }
279 
280         return doc;
281     }
282 
283     /***
284      * Reads a DOM4J Element from the provided event stream. The stream must be
285      * positioned before an {@link StartElement}event. In addition to the
286      * initial start event, all events up to and including the closing {@link
287      * EndElement} will be read, and included with the returned element.
288      * 
289      * @param eventReader
290      *            The event stream from which to read the Element.
291      * 
292      * @return The Element that was read from the stream.
293      * 
294      * @throws XMLStreamException
295      *             If an error occured reading events from the stream, or the
296      *             stream was not positioned before a {@linkStartElement}event.
297      */
298     public Element readElement(XMLEventReader eventReader)
299             throws XMLStreamException {
300         XMLEvent event = eventReader.peek();
301 
302         if (event.isStartElement()) {
303             // advance the reader and get the StartElement event
304             StartElement startTag = eventReader.nextEvent().asStartElement();
305             Element elem = createElement(startTag);
306 
307             // read element content
308             while (true) {
309                 if (!eventReader.hasNext()) {
310                     String msg = "Unexpected end of stream while reading"
311                             + " element content";
312                     throw new XMLStreamException(msg);
313                 }
314 
315                 XMLEvent nextEvent = eventReader.peek();
316 
317                 if (nextEvent.isEndElement()) {
318                     EndElement endElem = eventReader.nextEvent().asEndElement();
319 
320                     if (!endElem.getName().equals(startTag.getName())) {
321                         throw new XMLStreamException("Expected "
322                                 + startTag.getName() + " end-tag, but found"
323                                 + endElem.getName());
324                     }
325 
326                     break;
327                 }
328 
329                 Node child = readNode(eventReader);
330                 elem.add(child);
331             }
332 
333             return elem;
334         } else {
335             throw new XMLStreamException("Expected Element event, found: "
336                     + event);
337         }
338     }
339 
340     /***
341      * Constructs a DOM4J Attribute from the provided event stream. The stream
342      * must be positioned before an {@link Attribute}event.
343      * 
344      * @param reader
345      *            The event stream from which to read the Attribute.
346      * 
347      * @return The Attribute that was read from the stream.
348      * 
349      * @throws XMLStreamException
350      *             If an error occured reading events from the stream, or the
351      *             stream was not positioned before an {@linkAttribute}event.
352      */
353     public org.dom4j.Attribute readAttribute(XMLEventReader reader)
354             throws XMLStreamException {
355         XMLEvent event = reader.peek();
356 
357         if (event.isAttribute()) {
358             Attribute attr = (Attribute) reader.nextEvent();
359 
360             return createAttribute(null, attr);
361         } else {
362             throw new XMLStreamException("Expected Attribute event, found: "
363                     + event);
364         }
365     }
366 
367     /***
368      * Constructs a DOM4J Namespace from the provided event stream. The stream
369      * must be positioned before a {@link Namespace}event.
370      * 
371      * @param reader
372      *            The event stream from which to read the Namespace.
373      * 
374      * @return The Namespace that was read from the stream.
375      * 
376      * @throws XMLStreamException
377      *             If an error occured reading events from the stream, or the
378      *             stream was not positioned before a {@linkNamespace}event.
379      */
380     public org.dom4j.Namespace readNamespace(XMLEventReader reader)
381             throws XMLStreamException {
382         XMLEvent event = reader.peek();
383 
384         if (event.isNamespace()) {
385             Namespace ns = (Namespace) reader.nextEvent();
386 
387             return createNamespace(ns);
388         } else {
389             throw new XMLStreamException("Expected Namespace event, found: "
390                     + event);
391         }
392     }
393 
394     /***
395      * Constructs a DOM4J Text or CDATA section from the provided event stream.
396      * The stream must be positioned before a {@link Characters}event.
397      * 
398      * @param reader
399      *            The event stream from which to read the Text or CDATA.
400      * 
401      * @return The Text or CDATA that was read from the stream.
402      * 
403      * @throws XMLStreamException
404      *             If an error occured reading events from the stream, or the
405      *             stream was not positioned before a {@linkCharacters}event.
406      */
407     public CharacterData readCharacters(XMLEventReader reader)
408             throws XMLStreamException {
409         XMLEvent event = reader.peek();
410 
411         if (event.isCharacters()) {
412             Characters characters = reader.nextEvent().asCharacters();
413 
414             return createCharacterData(characters);
415         } else {
416             throw new XMLStreamException("Expected Characters event, found: "
417                     + event);
418         }
419     }
420 
421     /***
422      * Constructs a DOM4J Comment from the provided event stream. The stream
423      * must be positioned before a {@link Comment}event.
424      * 
425      * @param reader
426      *            The event stream from which to read the Comment.
427      * 
428      * @return The Comment that was read from the stream.
429      * 
430      * @throws XMLStreamException
431      *             If an error occured reading events from the stream, or the
432      *             stream was not positioned before a {@linkComment}event.
433      */
434     public org.dom4j.Comment readComment(XMLEventReader reader)
435             throws XMLStreamException {
436         XMLEvent event = reader.peek();
437 
438         if (event instanceof Comment) {
439             return createComment((Comment) reader.nextEvent());
440         } else {
441             throw new XMLStreamException("Expected Comment event, found: "
442                     + event);
443         }
444     }
445 
446     /***
447      * Constructs a DOM4J Entity from the provided event stream. The stream must
448      * be positioned before an {@link EntityReference}event.
449      * 
450      * @param reader
451      *            The event stream from which to read the {@link
452      *            EntityReference}.
453      * 
454      * @return The {@link org.dom4j.Entity}that was read from the stream.
455      * 
456      * @throws XMLStreamException
457      *             If an error occured reading events from the stream, or the
458      *             stream was not positioned before an {@linkEntityReference}
459      *             event.
460      */
461     public Entity readEntityReference(XMLEventReader reader)
462             throws XMLStreamException {
463         XMLEvent event = reader.peek();
464 
465         if (event.isEntityReference()) {
466             EntityReference entityRef = (EntityReference) reader.nextEvent();
467 
468             return createEntity(entityRef);
469         } else {
470             throw new XMLStreamException("Expected EntityRef event, found: "
471                     + event);
472         }
473     }
474 
475     /***
476      * Constructs a DOM4J ProcessingInstruction from the provided event stream.
477      * The stream must be positioned before a {@link ProcessingInstruction}
478      * event.
479      * 
480      * @param reader
481      *            The event stream from which to read the ProcessingInstruction.
482      * 
483      * @return The ProcessingInstruction that was read from the stream.
484      * 
485      * @throws XMLStreamException
486      *             If an error occured reading events from the stream, or the
487      *             stream was not positioned before a {@link
488      *             ProcessingInstruction} event.
489      */
490     public org.dom4j.ProcessingInstruction readProcessingInstruction(
491             XMLEventReader reader) throws XMLStreamException {
492         XMLEvent event = reader.peek();
493 
494         if (event.isProcessingInstruction()) {
495             ProcessingInstruction pi = (ProcessingInstruction) reader
496                     .nextEvent();
497 
498             return createProcessingInstruction(pi);
499         } else {
500             throw new XMLStreamException("Expected PI event, found: " + event);
501         }
502     }
503 
504     /***
505      * Constructs a new DOM4J Element from the provided StartElement event. All
506      * attributes and namespaces will be added to the returned element.
507      * 
508      * @param startEvent
509      *            The StartElement event from which to construct the new DOM4J
510      *            Element.
511      * 
512      * @return The Element constructed from the provided StartElement event.
513      */
514     public Element createElement(StartElement startEvent) {
515         QName qname = startEvent.getName();
516         org.dom4j.QName elemName = createQName(qname);
517 
518         Element elem = factory.createElement(elemName);
519 
520         // create attributes
521         for (Iterator i = startEvent.getAttributes(); i.hasNext();) {
522             Attribute attr = (Attribute) i.next();
523             elem.addAttribute(createQName(attr.getName()), attr.getValue());
524         }
525 
526         // create namespaces
527         for (Iterator i = startEvent.getNamespaces(); i.hasNext();) {
528             Namespace ns = (Namespace) i.next();
529             elem.addNamespace(ns.getPrefix(), ns.getNamespaceURI());
530         }
531 
532         return elem;
533     }
534 
535     /***
536      * Constructs a new DOM4J Attribute from the provided StAX Attribute event.
537      * 
538      * @param elem
539      *            DOCUMENT ME!
540      * @param attr
541      *            The Attribute event from which to construct the new DOM4J
542      *            Attribute.
543      * 
544      * @return The Attribute constructed from the provided Attribute event.
545      */
546     public org.dom4j.Attribute createAttribute(Element elem, Attribute attr) {
547         return factory.createAttribute(elem, createQName(attr.getName()), attr
548                 .getValue());
549     }
550 
551     /***
552      * Constructs a new DOM4J Namespace from the provided StAX Namespace event.
553      * 
554      * @param ns
555      *            The Namespace event from which to construct the new DOM4J
556      *            Namespace.
557      * 
558      * @return The Namespace constructed from the provided Namespace event.
559      */
560     public org.dom4j.Namespace createNamespace(Namespace ns) {
561         return factory.createNamespace(ns.getPrefix(), ns.getNamespaceURI());
562     }
563 
564     /***
565      * Constructs a new DOM4J Text or CDATA object from the provided Characters
566      * event.
567      * 
568      * @param characters
569      *            The Characters event from which to construct the new DOM4J
570      *            Text or CDATA object.
571      * 
572      * @return The Text or CDATA object constructed from the provided Characters
573      *         event.
574      */
575     public CharacterData createCharacterData(Characters characters) {
576         String data = characters.getData();
577 
578         if (characters.isCData()) {
579             return factory.createCDATA(data);
580         } else {
581             return factory.createText(data);
582         }
583     }
584 
585     /***
586      * Constructs a new DOM4J Comment from the provided StAX Comment event.
587      * 
588      * @param comment
589      *            The Comment event from which to construct the new DOM4J
590      *            Comment.
591      * 
592      * @return The Comment constructed from the provided Comment event.
593      */
594     public org.dom4j.Comment createComment(Comment comment) {
595         return factory.createComment(comment.getText());
596     }
597 
598     /***
599      * Constructs a new DOM4J Entity from the provided StAX EntityReference
600      * event.
601      * 
602      * @param entityRef
603      *            The EntityReference event from which to construct the new
604      *            DOM4J Entity.
605      * 
606      * @return The Entity constructed from the provided EntityReference event.
607      */
608     public org.dom4j.Entity createEntity(EntityReference entityRef) {
609         return factory.createEntity(entityRef.getName(), entityRef
610                 .getDeclaration().getReplacementText());
611     }
612 
613     /***
614      * Constructs a new DOM4J ProcessingInstruction from the provided StAX
615      * ProcessingInstruction event.
616      * 
617      * @param pi
618      *            The ProcessingInstruction event from which to construct the
619      *            new DOM4J ProcessingInstruction.
620      * 
621      * @return The ProcessingInstruction constructed from the provided
622      *         ProcessingInstruction event.
623      */
624     public org.dom4j.ProcessingInstruction createProcessingInstruction(
625             ProcessingInstruction pi) {
626         return factory
627                 .createProcessingInstruction(pi.getTarget(), pi.getData());
628     }
629 
630     /***
631      * Constructs a new DOM4J QName from the provided JAXP QName.
632      * 
633      * @param qname
634      *            The JAXP QName from which to create a DOM4J QName.
635      * 
636      * @return The newly constructed DOM4J QName.
637      */
638     public org.dom4j.QName createQName(QName qname) {
639         return factory.createQName(qname.getLocalPart(), qname.getPrefix(),
640                 qname.getNamespaceURI());
641     }
642 }
643 
644 /*
645  * Redistribution and use of this software and associated documentation
646  * ("Software"), with or without modification, are permitted provided that the
647  * following conditions are met:
648  * 
649  * 1. Redistributions of source code must retain copyright statements and
650  * notices. Redistributions must also contain a copy of this document.
651  * 
652  * 2. Redistributions in binary form must reproduce the above copyright notice,
653  * this list of conditions and the following disclaimer in the documentation
654  * and/or other materials provided with the distribution.
655  * 
656  * 3. The name "DOM4J" must not be used to endorse or promote products derived
657  * from this Software without prior written permission of MetaStuff, Ltd. For
658  * written permission, please contact dom4j-info@metastuff.com.
659  * 
660  * 4. Products derived from this Software may not be called "DOM4J" nor may
661  * "DOM4J" appear in their names without prior written permission of MetaStuff,
662  * Ltd. DOM4J is a registered trademark of MetaStuff, Ltd.
663  * 
664  * 5. Due credit should be given to the DOM4J Project - http://www.dom4j.org
665  * 
666  * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS ``AS IS'' AND
667  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
668  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
669  * ARE DISCLAIMED. IN NO EVENT SHALL METASTUFF, LTD. OR ITS CONTRIBUTORS BE
670  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
671  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
672  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
673  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
674  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
675  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
676  * POSSIBILITY OF SUCH DAMAGE.
677  * 
678  * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
679  */