From afebd79d44b75aed3b38e867c65330ba80ddc0ee Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 13 Oct 2009 11:29:26 +0200 Subject: Extended the streaming example It now shows how to perform stream-oriented, partially in-memory XML processing using the C++/Tree mapping. --- examples/cxx/tree/streaming/parser.cxx | 288 +++++++++++++++++++++++++++++++++ 1 file changed, 288 insertions(+) create mode 100644 examples/cxx/tree/streaming/parser.cxx (limited to 'examples/cxx/tree/streaming/parser.cxx') diff --git a/examples/cxx/tree/streaming/parser.cxx b/examples/cxx/tree/streaming/parser.cxx new file mode 100644 index 0000000..1a0fa73 --- /dev/null +++ b/examples/cxx/tree/streaming/parser.cxx @@ -0,0 +1,288 @@ +// file : examples/cxx/tree/streaming/parser.cxx +// author : Boris Kolpackov +// copyright : not copyrighted - public domain + +#include +#include + +#include +#include +#include +#include + +#include + +#if _XERCES_VERSION >= 30000 +# include +#endif + +#include + +#include +#include + +#include +#include + +#include "parser.hxx" + +using namespace std; +using namespace xercesc; + +namespace xml = xsd::cxx::xml; +namespace tree = xsd::cxx::tree; + +class parser_impl: DefaultHandler +{ +public: + parser_impl (); + + xml::dom::auto_ptr + start (istream& is, const string& id, bool validate); + + xml::dom::auto_ptr + next (); + + // SAX event handlers. + // +private: + virtual void + startElement (const XMLCh* const uri, + const XMLCh* const lname, + const XMLCh* const qname, + const Attributes& attributes); + + virtual void + endElement (const XMLCh* const uri, + const XMLCh* const lname, + const XMLCh* const qname); + + virtual void + characters (const XMLCh* const s, +#if _XERCES_VERSION >= 30000 + const XMLSize_t length +#else + const unsigned int length +#endif + ); + +private: + // SAX parser. + // + bool clean_; + auto_ptr parser_; + XMLPScanToken token_; + tree::error_handler error_handler_; + xml::sax::bits::error_handler_proxy error_proxy_; + auto_ptr isrc_; + + size_t depth_; + + // DOM document being built. + // + DOMImplementation& dom_impl_; + xml::dom::auto_ptr doc_; + DOMElement* cur_; +}; + +const XMLCh ls[] = {chLatin_L, chLatin_S, chNull}; + +parser_impl:: +parser_impl () + : clean_ (true), + parser_ (XMLReaderFactory::createXMLReader ()), + error_proxy_ (error_handler_), + dom_impl_ (*DOMImplementationRegistry::getDOMImplementation (ls)) +{ + parser_->setFeature (XMLUni::fgSAX2CoreNameSpaces, true); + parser_->setFeature (XMLUni::fgSAX2CoreNameSpacePrefixes, true); + parser_->setFeature (XMLUni::fgXercesValidationErrorAsFatal, true); + parser_->setFeature (XMLUni::fgXercesSchemaFullChecking, false); + + parser_->setErrorHandler (&error_proxy_); + parser_->setContentHandler (this); +} + +xml::dom::auto_ptr parser_impl:: +start (istream& is, const string& id, bool val) +{ + // Reset our state. + // + depth_ = 0; + doc_.reset (); + error_handler_.reset (); + + if (!clean_) + parser_->parseReset (token_); + else + clean_ = false; + + isrc_.reset (new xml::sax::std_input_source (is, id)); + + parser_->setFeature (XMLUni::fgSAX2CoreValidation, val); + parser_->setFeature (XMLUni::fgXercesSchema, val); + + // Start parsing. The first document that we return is a "carcase" + // of the complete document. That is, the root element with all the + // attributes but without any content. + // + bool r (parser_->parseFirst (*isrc_, token_)); + error_handler_.throw_if_failed > (); + + while (r && depth_ == 0) + { + r = parser_->parseNext (token_); + error_handler_.throw_if_failed > (); + } + + if (!r) + return xml::dom::auto_ptr (0); + + return doc_; +} + +xml::dom::auto_ptr parser_impl:: +next () +{ + // We should be at depth 1. If not, then we are done parsing. + // + if (depth_ != 1) + return xml::dom::auto_ptr (0); + + bool r (true); + + // Keep calling parseNext() until we either move to a greater depth or + // get a document. This way we skip the text (presumably whitespaces) + // that may be preceding the next chunk. + // + while (r && depth_ == 1 && doc_.get () == 0) + { + parser_->parseNext (token_); + error_handler_.throw_if_failed > (); + } + + if (!r) + return xml::dom::auto_ptr (0); + + // If we are not at depth 1, keep calling parseNext() until we get + // there. + // + while (r && depth_ != 1) + { + r = parser_->parseNext (token_); + error_handler_.throw_if_failed > (); + } + + if (!r) + return xml::dom::auto_ptr (0); + + return doc_; +} + +// DOM builder. +// + +void parser_impl:: +startElement (const XMLCh* const uri, + const XMLCh* const /*lname*/, + const XMLCh* const qname, + const Attributes& attr) +{ + if (doc_.get () == 0) + { + doc_.reset (dom_impl_.createDocument (uri, qname, 0)); + cur_ = doc_->getDocumentElement (); + } + else + { + DOMElement* e = doc_->createElementNS (uri, qname); + cur_->appendChild (e); + cur_ = e; + } + + // Set attributes. + // +#if _XERCES_VERSION >= 30000 + for (XMLSize_t i (0), end (attr.getLength()); i < end; ++i) +#else + for (unsigned int i (0), end (attr.getLength()); i < end; ++i) +#endif + { + cur_->setAttributeNS (attr.getURI (i), + attr.getQName (i), + attr.getValue (i)); + } + + depth_++; +} + +void parser_impl:: +endElement (const XMLCh* const /*uri*/, + const XMLCh* const /*lname*/, + const XMLCh* const /*qname*/) +{ + // We have an element parent only on depth 2 or greater. + // + if (--depth_ > 1) + cur_ = static_cast (cur_->getParentNode ()); +} + +#if _XERCES_VERSION >= 30000 +void parser_impl:: +characters (const XMLCh* const s, const XMLSize_t length) +{ + const XMLCh empty[] = {chNull}; + + // Ignore text content (presumably whitespaces) in the root element. + // + if (depth_ > 1) + { + DOMText* t = doc_->createTextNode (empty); + static_cast (t)->appendData (s, length); + cur_->appendChild (t); + } +} +#else +void parser_impl:: +characters (const XMLCh* const s, const unsigned int length) +{ + // Ignore text content (presumably whitespaces) in the root element. + // + if (depth_ > 1) + { + // For Xerces-C++ 2-series we have to make copy. + // + xsd::cxx::auto_array tmp (new XMLCh[length + 1]); + XMLString::copyNString (tmp.get (), s, length); + cur_->appendChild (doc_->createTextNode (tmp.get ())); + } +} +#endif + + +// +// parser +// + +parser:: +~parser () +{ +} + +parser:: +parser () + : impl_ (new parser_impl) +{ +} + +xml::dom::auto_ptr parser:: +start (istream& is, const string& id, bool val) +{ + return impl_->start (is, id, val); +} + +xml::dom::auto_ptr parser:: +next () +{ + return impl_->next (); +} -- cgit v1.1