1 files changed, 132 insertions, 37 deletions
diff --git a/examples/cxx/tree/streaming/parser.cxx b/examples/cxx/tree/streaming/parser.cxx
index a1f5bb8..41ad7af 100644
--- a/examples/cxx/tree/streaming/parser.cxx
+++ b/examples/cxx/tree/streaming/parser.cxx
@@ -1,5 +1,4 @@
-// file      : examples/cxx/tree/streaming/parser.cxx
-// copyright : not copyrighted - public domain
+#include <cassert>
 
 #include <xercesc/util/XMLUni.hpp>
 #include <xercesc/util/XMLString.hpp>
@@ -12,6 +11,9 @@
 #include <xercesc/dom/DOM.hpp>
 #include <xercesc/dom/impl/DOMTextImpl.hpp>
 
+#include <xercesc/validators/common/Grammar.hpp> // xercesc::Grammar
+#include <xercesc/framework/XMLGrammarPoolImpl.hpp>
+
 #include <xsd/cxx/auto-array.hxx>
 
 #include <xsd/cxx/xml/sax/std-input-source.hxx>
@@ -21,6 +23,7 @@
 #include <xsd/cxx/tree/error-handler.hxx>
 
 #include "parser.hxx"
+#include "grammar-input-stream.hxx"
 
 using namespace std;
 using namespace xercesc;
@@ -28,16 +31,22 @@ using namespace xercesc;
 namespace xml = xsd::cxx::xml;
 namespace tree = xsd::cxx::tree;
 
+typedef parser::document_ptr document_ptr;
+
 class parser_impl: public DefaultHandler
 {
 public:
-  parser_impl ();
+  parser_impl (const XMLByte* grammar, size_t grammar_size);
 
-  xml::dom::auto_ptr<DOMDocument>
+  void
   start (istream& is, const string& id, bool validate);
 
-  xml::dom::auto_ptr<DOMDocument>
-  next ();
+  document_ptr
+  peek ();
+
+  document_ptr
+  next (document_ptr doc = document_ptr (),
+        document_ptr outer_doc = document_ptr ());
 
   // SAX event handlers.
   //
@@ -61,6 +70,7 @@ private:
   // SAX parser.
   //
   bool clean_;
+  auto_ptr<XMLGrammarPool> grammar_pool_;
   auto_ptr<SAX2XMLReader> parser_;
   XMLPScanToken token_;
   tree::error_handler<char> error_handler_;
@@ -68,23 +78,40 @@ private:
   auto_ptr<xml::sax::std_input_source> isrc_;
 
   size_t depth_;
+  size_t whitespace_depth_; // Depth at which to ignore whitespaces.
+
+  bool peek_;
+  size_t next_depth_; // Depth at which next() should work.
 
   // DOM document being built.
   //
   DOMImplementation& dom_impl_;
-  xml::dom::auto_ptr<DOMDocument> doc_;
+  document_ptr doc_;
   DOMElement* cur_;
 };
 
 const XMLCh ls[] = {chLatin_L, chLatin_S, chNull};
 
 parser_impl::
-parser_impl ()
+parser_impl (const XMLByte* grammar, size_t grammar_size)
     : clean_ (true),
-      parser_ (XMLReaderFactory::createXMLReader ()),
       error_proxy_ (error_handler_),
       dom_impl_ (*DOMImplementationRegistry::getDOMImplementation (ls))
 {
+  MemoryManager* mm (XMLPlatformUtils::fgMemoryManager);
+
+  if (grammar != 0)
+  {
+    assert (grammar_size != 0);
+    grammar_pool_.reset (new XMLGrammarPoolImpl (mm));
+
+    grammar_input_stream is (grammar, grammar_size);
+    grammar_pool_->deserializeGrammars(&is);
+    grammar_pool_->lockPool ();
+  }
+
+  parser_.reset (XMLReaderFactory::createXMLReader (mm, grammar_pool_.get ()));
+
   parser_->setFeature (XMLUni::fgSAX2CoreNameSpaces, true);
   parser_->setFeature (XMLUni::fgSAX2CoreNameSpacePrefixes, true);
   parser_->setFeature (XMLUni::fgXercesValidationErrorAsFatal, true);
@@ -106,12 +133,13 @@ parser_impl ()
   parser_->setContentHandler (this);
 }
 
-xml::dom::auto_ptr<DOMDocument> parser_impl::
+void parser_impl::
 start (istream& is, const string& id, bool val)
 {
   // Reset our state.
   //
   depth_ = 0;
+  peek_ = false;
   doc_.reset ();
   error_handler_.reset ();
 
@@ -125,59 +153,116 @@ start (istream& is, const string& id, bool val)
   parser_->setFeature (XMLUni::fgSAX2CoreValidation, val);
   parser_->setFeature (XMLUni::fgXercesSchema, val);
 
-  // Start parsing. The first document that we return is a "carcase"
-  // of the complete document. That is, the root element with all the
-  // attributes but without any content.
-  //
-  bool r (parser_->parseFirst (*isrc_, token_));
+  if (val && grammar_pool_.get () != 0)
+  {
+    // Use the loaded grammar during parsing.
+    //
+    parser_->setFeature (XMLUni::fgXercesUseCachedGrammarInParse, true);
+
+    // Disable loading schemas via other means (e.g., schemaLocation).
+    //
+    parser_->setFeature (XMLUni::fgXercesLoadSchema, false);
+  }
+
+  parser_->parseFirst (*isrc_, token_);
   error_handler_.throw_if_failed<tree::parsing<char> > ();
+}
+
+document_ptr parser_impl::
+peek ()
+{
+  bool r (true);
+
+  size_t d (depth_);
+  whitespace_depth_ = d;
 
-  while (r && depth_ == 0)
+  peek_ = true;
+
+  // Parse (skip whitespace content) until the depth increases or we get
+  // a document. The latter test covers <element/> cases where both start
+  // and end events will trigger and therefore leave the depth unchanged.
+  //
+  while (r && depth_ == d && doc_.get () == 0)
   {
     r = parser_->parseNext (token_);
     error_handler_.throw_if_failed<tree::parsing<char> > ();
   }
 
   if (!r)
-    return xml::dom::auto_ptr<DOMDocument> (0);
+    return document_ptr (0);
 
   return doc_;
 }
 
-xml::dom::auto_ptr<DOMDocument> parser_impl::
-next ()
+document_ptr parser_impl::
+next (document_ptr doc, document_ptr outer_doc)
 {
-  // We should be at depth 1. If not, then we are done parsing.
+  assert (peek_ == (doc.get () != 0));
+
+  // Install doc/outer_doc as the document we are parsing.
   //
-  if (depth_ != 1)
-    return xml::dom::auto_ptr<DOMDocument> (0);
+  if (doc.get () != 0)
+  {
+    if (outer_doc.get () != 0)
+    {
+      // Copy doc to outer_doc.
+      //
+      doc_ = outer_doc;
+      cur_ = static_cast<DOMElement*> (
+        doc_->importNode (doc->getDocumentElement (), true));
+      doc_->getDocumentElement ()->appendChild (cur_);
+    }
+    else
+    {
+      doc_ = doc;
+      cur_ = doc_->getDocumentElement ();
+    }
+
+    // This handles the <element/> case where we get both start and
+    // end events in peek(). In this case the element is fully parsed
+    // and next() has nothing to do.
+    //
+    if (depth_ != next_depth_)
+    {
+      peek_ = false;
+      return doc_;
+    }
+  }
 
   bool r (true);
 
+  // If we peeked, then we have already seen the start tag and our
+  // return depth is one above the current depth.
+  //
+  size_t d (peek_ ? depth_ - 1 : depth_);
+  whitespace_depth_ = d;
+
+  peek_ = false;
+
   // Keep calling parseNext() until we either move to a greater depth or
   // get a document. This way we skip the text (presumably whitespaces)
-  // that may be preceding the next chunk.
+  // that may be preceding this chunk.
   //
-  while (r && depth_ == 1 && doc_.get () == 0)
+  while (r && depth_ == d && doc_.get () == 0)
   {
     parser_->parseNext (token_);
     error_handler_.throw_if_failed<tree::parsing<char> > ();
   }
 
   if (!r)
-    return xml::dom::auto_ptr<DOMDocument> (0);
+    return document_ptr (0);
 
-  // If we are not at depth 1, keep calling parseNext() until we get
-  // there.
+  // If we are not at our start depth, keep calling parseNext() until we
+  // get there again.
   //
-  while (r && depth_ != 1)
+  while (r && depth_ != d)
   {
     r = parser_->parseNext (token_);
     error_handler_.throw_if_failed<tree::parsing<char> > ();
   }
 
   if (!r)
-    return xml::dom::auto_ptr<DOMDocument> (0);
+    return document_ptr (0);
 
   return doc_;
 }
@@ -221,6 +306,9 @@ startElement (const XMLCh* const uri,
   }
 
   depth_++;
+
+  if (peek_)
+    next_depth_ = depth_;
 }
 
 void parser_impl::
@@ -239,9 +327,10 @@ characters (const XMLCh* const s, const XMLSize_t length)
 {
   const XMLCh empty[] = {chNull};
 
-  // Ignore text content (presumably whitespaces) in the root element.
+  // Ignore text content (presumably whitespaces) while looking for
+  // the next element.
   //
-  if (depth_ > 1)
+  if (depth_ > whitespace_depth_)
   {
     DOMText* t = doc_->createTextNode (empty);
     static_cast<DOMTextImpl*> (t)->appendData (s, length);
@@ -259,19 +348,25 @@ parser::
 }
 
 parser::
-parser ()
-    : impl_ (new parser_impl)
+parser (const XMLByte* grammar, size_t grammar_size)
+    : impl_ (new parser_impl (grammar, grammar_size))
 {
 }
 
-xml::dom::auto_ptr<DOMDocument> parser::
+void parser::
 start (istream& is, const string& id, bool val)
 {
   return impl_->start (is, id, val);
 }
 
-xml::dom::auto_ptr<DOMDocument> parser::
-next ()
+document_ptr parser::
+peek ()
+{
+  return impl_->peek ();
+}
+
+document_ptr parser::
+next (document_ptr doc, document_ptr outer_doc)
 {
-  return impl_->next ();
+  return impl_->next (doc, outer_doc);
 }