summaryrefslogtreecommitdiff
path: root/examples/cxx/tree/streaming/parser.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'examples/cxx/tree/streaming/parser.cxx')
-rw-r--r--examples/cxx/tree/streaming/parser.cxx169
1 files changed, 132 insertions, 37 deletions
diff --git a/examples/cxx/tree/streaming/parser.cxx b/examples/cxx/tree/streaming/parser.cxx
index a1f5bb8..41ad7af 100644
--- a/examples/cxx/tree/streaming/parser.cxx
+++ b/examples/cxx/tree/streaming/parser.cxx
@@ -1,5 +1,4 @@
-// file : examples/cxx/tree/streaming/parser.cxx
-// copyright : not copyrighted - public domain
+#include <cassert>
#include <xercesc/util/XMLUni.hpp>
#include <xercesc/util/XMLString.hpp>
@@ -12,6 +11,9 @@
#include <xercesc/dom/DOM.hpp>
#include <xercesc/dom/impl/DOMTextImpl.hpp>
+#include <xercesc/validators/common/Grammar.hpp> // xercesc::Grammar
+#include <xercesc/framework/XMLGrammarPoolImpl.hpp>
+
#include <xsd/cxx/auto-array.hxx>
#include <xsd/cxx/xml/sax/std-input-source.hxx>
@@ -21,6 +23,7 @@
#include <xsd/cxx/tree/error-handler.hxx>
#include "parser.hxx"
+#include "grammar-input-stream.hxx"
using namespace std;
using namespace xercesc;
@@ -28,16 +31,22 @@ using namespace xercesc;
namespace xml = xsd::cxx::xml;
namespace tree = xsd::cxx::tree;
+typedef parser::document_ptr document_ptr;
+
class parser_impl: public DefaultHandler
{
public:
- parser_impl ();
+ parser_impl (const XMLByte* grammar, size_t grammar_size);
- xml::dom::auto_ptr<DOMDocument>
+ void
start (istream& is, const string& id, bool validate);
- xml::dom::auto_ptr<DOMDocument>
- next ();
+ document_ptr
+ peek ();
+
+ document_ptr
+ next (document_ptr doc = document_ptr (),
+ document_ptr outer_doc = document_ptr ());
// SAX event handlers.
//
@@ -61,6 +70,7 @@ private:
// SAX parser.
//
bool clean_;
+ auto_ptr<XMLGrammarPool> grammar_pool_;
auto_ptr<SAX2XMLReader> parser_;
XMLPScanToken token_;
tree::error_handler<char> error_handler_;
@@ -68,23 +78,40 @@ private:
auto_ptr<xml::sax::std_input_source> isrc_;
size_t depth_;
+ size_t whitespace_depth_; // Depth at which to ignore whitespaces.
+
+ bool peek_;
+ size_t next_depth_; // Depth at which next() should work.
// DOM document being built.
//
DOMImplementation& dom_impl_;
- xml::dom::auto_ptr<DOMDocument> doc_;
+ document_ptr doc_;
DOMElement* cur_;
};
const XMLCh ls[] = {chLatin_L, chLatin_S, chNull};
parser_impl::
-parser_impl ()
+parser_impl (const XMLByte* grammar, size_t grammar_size)
: clean_ (true),
- parser_ (XMLReaderFactory::createXMLReader ()),
error_proxy_ (error_handler_),
dom_impl_ (*DOMImplementationRegistry::getDOMImplementation (ls))
{
+ MemoryManager* mm (XMLPlatformUtils::fgMemoryManager);
+
+ if (grammar != 0)
+ {
+ assert (grammar_size != 0);
+ grammar_pool_.reset (new XMLGrammarPoolImpl (mm));
+
+ grammar_input_stream is (grammar, grammar_size);
+ grammar_pool_->deserializeGrammars(&is);
+ grammar_pool_->lockPool ();
+ }
+
+ parser_.reset (XMLReaderFactory::createXMLReader (mm, grammar_pool_.get ()));
+
parser_->setFeature (XMLUni::fgSAX2CoreNameSpaces, true);
parser_->setFeature (XMLUni::fgSAX2CoreNameSpacePrefixes, true);
parser_->setFeature (XMLUni::fgXercesValidationErrorAsFatal, true);
@@ -106,12 +133,13 @@ parser_impl ()
parser_->setContentHandler (this);
}
-xml::dom::auto_ptr<DOMDocument> parser_impl::
+void parser_impl::
start (istream& is, const string& id, bool val)
{
// Reset our state.
//
depth_ = 0;
+ peek_ = false;
doc_.reset ();
error_handler_.reset ();
@@ -125,59 +153,116 @@ start (istream& is, const string& id, bool val)
parser_->setFeature (XMLUni::fgSAX2CoreValidation, val);
parser_->setFeature (XMLUni::fgXercesSchema, val);
- // Start parsing. The first document that we return is a "carcase"
- // of the complete document. That is, the root element with all the
- // attributes but without any content.
- //
- bool r (parser_->parseFirst (*isrc_, token_));
+ if (val && grammar_pool_.get () != 0)
+ {
+ // Use the loaded grammar during parsing.
+ //
+ parser_->setFeature (XMLUni::fgXercesUseCachedGrammarInParse, true);
+
+ // Disable loading schemas via other means (e.g., schemaLocation).
+ //
+ parser_->setFeature (XMLUni::fgXercesLoadSchema, false);
+ }
+
+ parser_->parseFirst (*isrc_, token_);
error_handler_.throw_if_failed<tree::parsing<char> > ();
+}
+
+document_ptr parser_impl::
+peek ()
+{
+ bool r (true);
+
+ size_t d (depth_);
+ whitespace_depth_ = d;
- while (r && depth_ == 0)
+ peek_ = true;
+
+ // Parse (skip whitespace content) until the depth increases or we get
+ // a document. The latter test covers <element/> cases where both start
+ // and end events will trigger and therefore leave the depth unchanged.
+ //
+ while (r && depth_ == d && doc_.get () == 0)
{
r = parser_->parseNext (token_);
error_handler_.throw_if_failed<tree::parsing<char> > ();
}
if (!r)
- return xml::dom::auto_ptr<DOMDocument> (0);
+ return document_ptr (0);
return doc_;
}
-xml::dom::auto_ptr<DOMDocument> parser_impl::
-next ()
+document_ptr parser_impl::
+next (document_ptr doc, document_ptr outer_doc)
{
- // We should be at depth 1. If not, then we are done parsing.
+ assert (peek_ == (doc.get () != 0));
+
+ // Install doc/outer_doc as the document we are parsing.
//
- if (depth_ != 1)
- return xml::dom::auto_ptr<DOMDocument> (0);
+ if (doc.get () != 0)
+ {
+ if (outer_doc.get () != 0)
+ {
+ // Copy doc to outer_doc.
+ //
+ doc_ = outer_doc;
+ cur_ = static_cast<DOMElement*> (
+ doc_->importNode (doc->getDocumentElement (), true));
+ doc_->getDocumentElement ()->appendChild (cur_);
+ }
+ else
+ {
+ doc_ = doc;
+ cur_ = doc_->getDocumentElement ();
+ }
+
+ // This handles the <element/> case where we get both start and
+ // end events in peek(). In this case the element is fully parsed
+ // and next() has nothing to do.
+ //
+ if (depth_ != next_depth_)
+ {
+ peek_ = false;
+ return doc_;
+ }
+ }
bool r (true);
+ // If we peeked, then we have already seen the start tag and our
+ // return depth is one above the current depth.
+ //
+ size_t d (peek_ ? depth_ - 1 : depth_);
+ whitespace_depth_ = d;
+
+ peek_ = false;
+
// Keep calling parseNext() until we either move to a greater depth or
// get a document. This way we skip the text (presumably whitespaces)
- // that may be preceding the next chunk.
+ // that may be preceding this chunk.
//
- while (r && depth_ == 1 && doc_.get () == 0)
+ while (r && depth_ == d && doc_.get () == 0)
{
parser_->parseNext (token_);
error_handler_.throw_if_failed<tree::parsing<char> > ();
}
if (!r)
- return xml::dom::auto_ptr<DOMDocument> (0);
+ return document_ptr (0);
- // If we are not at depth 1, keep calling parseNext() until we get
- // there.
+ // If we are not at our start depth, keep calling parseNext() until we
+ // get there again.
//
- while (r && depth_ != 1)
+ while (r && depth_ != d)
{
r = parser_->parseNext (token_);
error_handler_.throw_if_failed<tree::parsing<char> > ();
}
if (!r)
- return xml::dom::auto_ptr<DOMDocument> (0);
+ return document_ptr (0);
return doc_;
}
@@ -221,6 +306,9 @@ startElement (const XMLCh* const uri,
}
depth_++;
+
+ if (peek_)
+ next_depth_ = depth_;
}
void parser_impl::
@@ -239,9 +327,10 @@ characters (const XMLCh* const s, const XMLSize_t length)
{
const XMLCh empty[] = {chNull};
- // Ignore text content (presumably whitespaces) in the root element.
+ // Ignore text content (presumably whitespaces) while looking for
+ // the next element.
//
- if (depth_ > 1)
+ if (depth_ > whitespace_depth_)
{
DOMText* t = doc_->createTextNode (empty);
static_cast<DOMTextImpl*> (t)->appendData (s, length);
@@ -259,19 +348,25 @@ parser::
}
parser::
-parser ()
- : impl_ (new parser_impl)
+parser (const XMLByte* grammar, size_t grammar_size)
+ : impl_ (new parser_impl (grammar, grammar_size))
{
}
-xml::dom::auto_ptr<DOMDocument> parser::
+void parser::
start (istream& is, const string& id, bool val)
{
return impl_->start (is, id, val);
}
-xml::dom::auto_ptr<DOMDocument> parser::
-next ()
+document_ptr parser::
+peek ()
+{
+ return impl_->peek ();
+}
+
+document_ptr parser::
+next (document_ptr doc, document_ptr outer_doc)
{
- return impl_->next ();
+ return impl_->next (doc, outer_doc);
}