From 5e527213a2430bb3018e5eebd909aef294edf9b5 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Fri, 18 Dec 2020 18:48:46 +0300 Subject: Switch to build2 --- xsd-examples/cxx/tree/streaming/.gitignore | 1 + xsd-examples/cxx/tree/streaming/README | 51 ++ xsd-examples/cxx/tree/streaming/buildfile | 26 + xsd-examples/cxx/tree/streaming/driver.cxx | 139 +++++ .../cxx/tree/streaming/grammar-input-stream.cxx | 96 ++++ .../cxx/tree/streaming/grammar-input-stream.hxx | 41 ++ xsd-examples/cxx/tree/streaming/parser.cxx | 371 ++++++++++++ xsd-examples/cxx/tree/streaming/parser.hxx | 67 +++ xsd-examples/cxx/tree/streaming/position.xml | 29 + xsd-examples/cxx/tree/streaming/position.xsd | 37 ++ xsd-examples/cxx/tree/streaming/serializer.cxx | 636 +++++++++++++++++++++ xsd-examples/cxx/tree/streaming/serializer.hxx | 209 +++++++ xsd-examples/cxx/tree/streaming/testscript | 10 + 13 files changed, 1713 insertions(+) create mode 100644 xsd-examples/cxx/tree/streaming/.gitignore create mode 100644 xsd-examples/cxx/tree/streaming/README create mode 100644 xsd-examples/cxx/tree/streaming/buildfile create mode 100644 xsd-examples/cxx/tree/streaming/driver.cxx create mode 100644 xsd-examples/cxx/tree/streaming/grammar-input-stream.cxx create mode 100644 xsd-examples/cxx/tree/streaming/grammar-input-stream.hxx create mode 100644 xsd-examples/cxx/tree/streaming/parser.cxx create mode 100644 xsd-examples/cxx/tree/streaming/parser.hxx create mode 100644 xsd-examples/cxx/tree/streaming/position.xml create mode 100644 xsd-examples/cxx/tree/streaming/position.xsd create mode 100644 xsd-examples/cxx/tree/streaming/serializer.cxx create mode 100644 xsd-examples/cxx/tree/streaming/serializer.hxx create mode 100644 xsd-examples/cxx/tree/streaming/testscript (limited to 'xsd-examples/cxx/tree/streaming') diff --git a/xsd-examples/cxx/tree/streaming/.gitignore b/xsd-examples/cxx/tree/streaming/.gitignore new file mode 100644 index 0000000..db4a6e9 --- /dev/null +++ b/xsd-examples/cxx/tree/streaming/.gitignore @@ -0,0 +1 @@ +position.?xx diff --git a/xsd-examples/cxx/tree/streaming/README b/xsd-examples/cxx/tree/streaming/README new file mode 100644 index 0000000..5a467e0 --- /dev/null +++ b/xsd-examples/cxx/tree/streaming/README @@ -0,0 +1,51 @@ +This example shows how to perform stream-oriented, partially in-memory +XML processing using the C++/Tree mapping. With the partially in-memory +parsing and serialization only a part of the object model is in memory at +any given time. With this approach we can process parts of the document +as they become available as well as handle documents that are too large +to fit into memory. + +The example consists of the following files: + +position.xsd + XML Schema which describes a simple object position vocabulary. The + position is represented as a potentially large series of latitude and + longitude measurements. + +position.xml + Sample object position document. + +position.hxx +position.cxx + C++ types that represent the position vocabulary as well as parsing + and serialization functions. These are generated by XSD from + position.xsd. + +parser.hxx +parser.cxx + Stream-oriented DOM parser implementation that is built on top of the + Xerces-C++ SAX2 parser in the progressive parsing mode. This parser + allows us to parse an XML document as a series of DOM fragments. + +serializer.hxx +serializer.cxx + Stream-oriented DOM serializer implementation that allows us to + serialize an XML Document as a series of object model fragments. + +grammar-input-stream.hxx +grammar-input-stream.cxx + Input stream implementation with the special-purpose schema grammar + decompression algorithm. It is used internally by the streaming parser. + +driver.cxx + Driver for the example. It parses the input file into a series of DOM + fragments which are then parsed into the object model fragments. The + driver prints the information from the document as it becomes available. + It also serializes the object model fragments into a new XML document + (out.xml). + +To run the example simply execute: + +$ ./driver position.xml + +The serialization results are written to the out.xml file. diff --git a/xsd-examples/cxx/tree/streaming/buildfile b/xsd-examples/cxx/tree/streaming/buildfile new file mode 100644 index 0000000..8c34b17 --- /dev/null +++ b/xsd-examples/cxx/tree/streaming/buildfile @@ -0,0 +1,26 @@ +# file : cxx/tree/streaming/buildfile +# license : not copyrighted - public domain + +import libs = libxsd%lib{xsd} +import libs += libxerces-c%lib{xerces-c} + +./: exe{driver} xml{position} doc{README} + +exe{driver}: {hxx cxx}{* -position} {hxx ixx cxx}{position} $libs testscript + +<{hxx ixx cxx}{position}>: xsd{position} $xsd +{{ + diag xsd ($<[0]) # @@ TMP + + $xsd cxx-tree --std c++11 \ + --generate-inline \ + --generate-serialization \ + --output-dir $out_base \ + $path($<[0]) +}} + +cxx.poptions =+ "-I$out_base" "-I$src_base" + +# Define XSD_CXX11 since we include libxsd headers directly. +# +cxx.poptions += -DXSD_CXX11 diff --git a/xsd-examples/cxx/tree/streaming/driver.cxx b/xsd-examples/cxx/tree/streaming/driver.cxx new file mode 100644 index 0000000..8a70f58 --- /dev/null +++ b/xsd-examples/cxx/tree/streaming/driver.cxx @@ -0,0 +1,139 @@ +// file : cxx/tree/streaming/driver.cxx +// copyright : not copyrighted - public domain + +#include +#include + +#include + +#include // xml::string + +#include "parser.hxx" +#include "serializer.hxx" +#include "position.hxx" + +using namespace std; +using namespace xercesc; + +int +main (int argc, char* argv[]) +{ + if (argc != 2) + { + cerr << "usage: " << argv[0] << " position.xml" << endl; + return 1; + } + + int r (0); + + // We need to initialize the Xerces-C++ runtime because we are doing + // the XML-to-DOM parsing ourselves. + // + xercesc::XMLPlatformUtils::Initialize (); + + try + { + using namespace op; + namespace xml = xsd::cxx::xml; + + // Parse and serialize at the same time, in the streaming mode. + // + + ifstream ifs; + ifs.exceptions (ifstream::badbit | ifstream::failbit); + ifs.open (argv[1]); + + ofstream ofs; + ofs.exceptions (ios_base::badbit | ios_base::failbit); + ofs.open ("out.xml"); + + xml_schema::namespace_infomap ns_map; + ns_map["op"].name = "http://www.codesynthesis.com/op"; + ns_map["op"].schema = "position.xsd"; + + parser p; + serializer s; + + p.start (ifs, argv[1], true); + s.start (ofs); + + typedef xml_schema::dom::unique_ptr document_ptr; + + // Peek at the root element. This way we only get the "carcase" + // of the document, that is, the root element with its name, all + // the attributes, and namespace declarations but without any of + // the nested elements. + // + document_ptr docr (p.peek ()); + bool parsed (false); + + // Parse first-level elements. + // + for (document_ptr doc1 (p.peek ()); doc1.get () != 0; doc1 = p.peek ()) + { + // Check whether it is an element that we should stream (position) or + // just add to the root (header). + // + string n1 (xml::transcode ( + doc1->getDocumentElement ()->getLocalName ())); + + // If we see the first streaming element, then parse the root carcase. + // + if (!parsed && n1 == "position") + { + object o (*docr->getDocumentElement ()); + + cerr << "id: " << o.id () << endl + << "name: " << o.header ().name () << endl + << "type: " << o.header ().type () << endl; + + // Start serializing the document by writing out the root carcase. + // Note that we leave it open so that we can serialize more elements. + // + s.next_open (ns_map["op"].name, "op:object", ns_map, o); + parsed = true; + } + + // Handle elements that need streaming. + // + if (n1 == "position") + { + // Position has no nested elements that we need to stream so we + // finish parsing it in one go. + // + doc1 = p.next (move (doc1)); + position pos (*doc1->getDocumentElement ()); + + cerr << "lat: " << pos.lat () << " lon: " << pos.lon () << endl; + + // Serialize it (append) to the root element. + // + s.next ("position", pos); + } + else + { + // Element that doesn't require streaming (header in our case). Add + // to the root element and finish parsing. + // + docr = p.next (move (doc1), move (docr)); + } + } + + // Close the root element in serializer. + // + s.next_close ("op:object"); + } + catch (const xml_schema::exception& e) + { + cerr << e << endl; + r = 1; + } + catch (const ios_base::failure&) + { + cerr << "io failure" << endl; + r = 1; + } + + xercesc::XMLPlatformUtils::Terminate (); + return r; +} diff --git a/xsd-examples/cxx/tree/streaming/grammar-input-stream.cxx b/xsd-examples/cxx/tree/streaming/grammar-input-stream.cxx new file mode 100644 index 0000000..6f17f33 --- /dev/null +++ b/xsd-examples/cxx/tree/streaming/grammar-input-stream.cxx @@ -0,0 +1,96 @@ +// file : cxx/tree/streaming/grammar-input-stream.cxx +// author : Boris Kolpackov +// copyright : not copyrighted - public domain + +#include +#include "grammar-input-stream.hxx" + +grammar_input_stream:: +grammar_input_stream (const XMLByte* data, std::size_t size) + : data_ (data), + size_ (size), + pos_ (0), + vpos_ (0), + cseq_ (0), + add_zero_ (false) +{ +} + +XMLFilePos grammar_input_stream:: +curPos () const +{ + return static_cast (vpos_); +} + +XMLSize_t grammar_input_stream:: +readBytes (XMLByte* const buf, const XMLSize_t size) +{ + std::size_t i (0); + + // Add a zero from the alternating sequence if it didn't + // fit on the previous read. + // + if (add_zero_) + { + buf[i++] = 0; + add_zero_ = false; + } + + // If have an unfinished sequential sequence, output it now. + // + if (cseq_ != 0 && !alt_) + { + for (; cseq_ != 0 && i < size; --cseq_) + buf[i++] = 0; + } + + for (; i < size && pos_ < size_;) + { + XMLByte b = buf[i++] = data_[pos_++]; + + // See if we are in a compression sequence. + // + if (cseq_ != 0) + { + if (i < size) + buf[i++] = 0; + else + add_zero_ = true; // Add it on the next read. + + cseq_--; + continue; + } + + // If we are not in a compression sequence and this byte is + // not zero then we are done. + // + if (b != 0) + continue; + + // We have a zero. + // + assert (pos_ < size_); // There has to be another byte. + unsigned char v (static_cast (data_[pos_++])); + alt_ = (v & 128) != 0; + cseq_ = v & 127; + + // If it is a sequential sequence, output as many zeros as + // we can. + // + if (!alt_) + { + for (; cseq_ != 0 && i < size; --cseq_) + buf[i++] = 0; + } + } + + vpos_ += i; + + return static_cast (i); +} + +const XMLCh* grammar_input_stream:: +getContentType () const +{ + return 0; +} diff --git a/xsd-examples/cxx/tree/streaming/grammar-input-stream.hxx b/xsd-examples/cxx/tree/streaming/grammar-input-stream.hxx new file mode 100644 index 0000000..17e6913 --- /dev/null +++ b/xsd-examples/cxx/tree/streaming/grammar-input-stream.hxx @@ -0,0 +1,41 @@ +// file : cxx/tree/streaming/grammar-input-stream.hxx +// author : Boris Kolpackov +// copyright : not copyrighted - public domain + +#ifndef GRAMMAR_INPUT_STREAM_HXX +#define GRAMMAR_INPUT_STREAM_HXX + +#include +#include + +// Memory buffer input stream with the special-purpose schema +// grammar decompression. +// +class grammar_input_stream: public xercesc::BinInputStream +{ +public : + grammar_input_stream (const XMLByte* data, std::size_t size); + + virtual XMLFilePos + curPos () const; + + virtual XMLSize_t + readBytes (XMLByte* const buf, const XMLSize_t size); + + virtual const XMLCh* + getContentType () const; + +private : + const XMLByte* data_; + std::size_t size_; + std::size_t pos_; + std::size_t vpos_; + + // Compression data. + // + size_t cseq_; // Number of bytes left in a compression sequence. + bool alt_; // Alternating or sequential sequence. + bool add_zero_; // Add a zero on the next read. +}; + +#endif // GRAMMAR_INPUT_STREAM_HXX diff --git a/xsd-examples/cxx/tree/streaming/parser.cxx b/xsd-examples/cxx/tree/streaming/parser.cxx new file mode 100644 index 0000000..064dc77 --- /dev/null +++ b/xsd-examples/cxx/tree/streaming/parser.cxx @@ -0,0 +1,371 @@ +#include +#include // std::move() + +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include // xercesc::Grammar +#include + +#include +#include + +#include +#include + +#include "parser.hxx" +#include "grammar-input-stream.hxx" + +using namespace std; +using namespace xercesc; + +namespace xml = xsd::cxx::xml; +namespace tree = xsd::cxx::tree; + +typedef parser::document_ptr document_ptr; + +class parser_impl: public DefaultHandler +{ +public: + parser_impl (const XMLByte* grammar, size_t grammar_size); + + void + start (istream& is, const string& id, bool validate); + + document_ptr + peek (); + + document_ptr + next (document_ptr doc = document_ptr (), + document_ptr outer_doc = document_ptr ()); + + // SAX event handlers. + // +private: + virtual void + startElement (const XMLCh* const uri, + const XMLCh* const lname, + const XMLCh* const qname, + const Attributes& attributes); + + virtual void + endElement (const XMLCh* const uri, + const XMLCh* const lname, + const XMLCh* const qname); + + virtual void + characters (const XMLCh* const s, + const XMLSize_t length); + +private: + // SAX parser. + // + bool clean_; + unique_ptr grammar_pool_; + unique_ptr parser_; + XMLPScanToken token_; + tree::error_handler error_handler_; + xml::sax::bits::error_handler_proxy error_proxy_; + unique_ptr isrc_; + + size_t depth_; + size_t whitespace_depth_; // Depth at which to ignore whitespaces. + + bool peek_; + size_t next_depth_; // Depth at which next() should work. + + // DOM document being built. + // + DOMImplementation& dom_impl_; + document_ptr doc_; + DOMElement* cur_; +}; + +const XMLCh ls[] = {chLatin_L, chLatin_S, chNull}; + +parser_impl:: +parser_impl (const XMLByte* grammar, size_t grammar_size) + : clean_ (true), + error_proxy_ (error_handler_), + dom_impl_ (*DOMImplementationRegistry::getDOMImplementation (ls)) +{ + MemoryManager* mm (XMLPlatformUtils::fgMemoryManager); + + if (grammar != 0) + { + assert (grammar_size != 0); + grammar_pool_.reset (new XMLGrammarPoolImpl (mm)); + + grammar_input_stream is (grammar, grammar_size); + grammar_pool_->deserializeGrammars(&is); + grammar_pool_->lockPool (); + } + + parser_.reset (XMLReaderFactory::createXMLReader (mm, grammar_pool_.get ())); + + parser_->setFeature (XMLUni::fgSAX2CoreNameSpaces, true); + parser_->setFeature (XMLUni::fgSAX2CoreNameSpacePrefixes, true); + parser_->setFeature (XMLUni::fgXercesValidationErrorAsFatal, true); + parser_->setFeature (XMLUni::fgXercesSchemaFullChecking, false); + + // Xerces-C++ 3.1.0 is the first version with working multi import + // support. It also allows us to disable buffering in the parser + // so that the data is parsed and returned as soon as it is + // available. + // +#if _XERCES_VERSION >= 30100 + parser_->setFeature (XMLUni::fgXercesHandleMultipleImports, true); + + XMLSize_t lwm = 0; + parser_->setProperty (XMLUni::fgXercesLowWaterMark, &lwm); +#endif + + parser_->setErrorHandler (&error_proxy_); + parser_->setContentHandler (this); +} + +void parser_impl:: +start (istream& is, const string& id, bool val) +{ + // Reset our state. + // + depth_ = 0; + peek_ = false; + doc_.reset (); + error_handler_.reset (); + + if (!clean_) + parser_->parseReset (token_); + else + clean_ = false; + + isrc_.reset (new xml::sax::std_input_source (is, id)); + + parser_->setFeature (XMLUni::fgSAX2CoreValidation, val); + parser_->setFeature (XMLUni::fgXercesSchema, val); + + if (val && grammar_pool_.get () != 0) + { + // Use the loaded grammar during parsing. + // + parser_->setFeature (XMLUni::fgXercesUseCachedGrammarInParse, true); + + // Disable loading schemas via other means (e.g., schemaLocation). + // + parser_->setFeature (XMLUni::fgXercesLoadSchema, false); + } + + parser_->parseFirst (*isrc_, token_); + error_handler_.throw_if_failed > (); +} + +document_ptr parser_impl:: +peek () +{ + bool r (true); + + size_t d (depth_); + whitespace_depth_ = d; + + peek_ = true; + + // Parse (skip whitespace content) until the depth increases or we get + // a document. The latter test covers cases where both start + // and end events will trigger and therefore leave the depth unchanged. + // + while (r && depth_ == d && doc_.get () == 0) + { + r = parser_->parseNext (token_); + error_handler_.throw_if_failed > (); + } + + if (!r) + return document_ptr (); + + return move (doc_); +} + +document_ptr parser_impl:: +next (document_ptr doc, document_ptr outer_doc) +{ + assert (peek_ == (doc.get () != 0)); + + // Install doc/outer_doc as the document we are parsing. + // + if (doc.get () != 0) + { + if (outer_doc.get () != 0) + { + // Move doc to outer_doc. + // + doc_ = move (outer_doc); + cur_ = static_cast ( + doc_->importNode (doc->getDocumentElement (), true)); + doc_->getDocumentElement ()->appendChild (cur_); + } + else + { + doc_ = move (doc); + cur_ = doc_->getDocumentElement (); + } + + // This handles the case where we get both start and + // end events in peek(). In this case the element is fully parsed + // and next() has nothing to do. + // + if (depth_ != next_depth_) + { + peek_ = false; + return move (doc_); + } + } + + bool r (true); + + // If we peeked, then we have already seen the start tag and our + // return depth is one above the current depth. + // + size_t d (peek_ ? depth_ - 1 : depth_); + whitespace_depth_ = d; + + peek_ = false; + + // Keep calling parseNext() until we either move to a greater depth or + // get a document. This way we skip the text (presumably whitespaces) + // that may be preceding this chunk. + // + while (r && depth_ == d && doc_.get () == 0) + { + parser_->parseNext (token_); + error_handler_.throw_if_failed > (); + } + + if (!r) + return document_ptr (); + + // If we are not at our start depth, keep calling parseNext() until we + // get there again. + // + while (r && depth_ != d) + { + r = parser_->parseNext (token_); + error_handler_.throw_if_failed > (); + } + + if (!r) + return document_ptr (); + + return move (doc_); +} + +// DOM builder. +// + +void parser_impl:: +startElement (const XMLCh* const uri, + const XMLCh* const /*lname*/, + const XMLCh* const qname, + const Attributes& attr) +{ + if (doc_.get () == 0) + { + doc_.reset (dom_impl_.createDocument (uri, qname, 0)); + cur_ = doc_->getDocumentElement (); + } + else + { + DOMElement* e = doc_->createElementNS (uri, qname); + cur_->appendChild (e); + cur_ = e; + } + + // Set attributes. + // + for (XMLSize_t i (0), end (attr.getLength()); i < end; ++i) + { + const XMLCh* qn (attr.getQName (i)); + const XMLCh* ns (attr.getURI (i)); + + // When SAX2 reports the xmlns attribute, it does not include + // the proper attribute namespace. So we have to detect and + // handle this case. + // + if (XMLString::equals (qn, XMLUni::fgXMLNSString)) + ns = XMLUni::fgXMLNSURIName; + + cur_->setAttributeNS (ns, qn, attr.getValue (i)); + } + + depth_++; + + if (peek_) + next_depth_ = depth_; +} + +void parser_impl:: +endElement (const XMLCh* const /*uri*/, + const XMLCh* const /*lname*/, + const XMLCh* const /*qname*/) +{ + // We have an element parent only on depth 2 or greater. + // + if (--depth_ > 1) + cur_ = static_cast (cur_->getParentNode ()); +} + +void parser_impl:: +characters (const XMLCh* const s, const XMLSize_t length) +{ + const XMLCh empty[] = {chNull}; + + // Ignore text content (presumably whitespaces) while looking for + // the next element. + // + if (depth_ > whitespace_depth_) + { + DOMText* t = doc_->createTextNode (empty); + static_cast (t)->appendData (s, length); + cur_->appendChild (t); + } +} + +// +// parser +// + +parser:: +~parser () +{ +} + +parser:: +parser (const XMLByte* grammar, size_t grammar_size) + : impl_ (new parser_impl (grammar, grammar_size)) +{ +} + +void parser:: +start (istream& is, const string& id, bool val) +{ + return impl_->start (is, id, val); +} + +document_ptr parser:: +peek () +{ + return impl_->peek (); +} + +document_ptr parser:: +next (document_ptr doc, document_ptr outer_doc) +{ + return impl_->next (move (doc), move (outer_doc)); +} diff --git a/xsd-examples/cxx/tree/streaming/parser.hxx b/xsd-examples/cxx/tree/streaming/parser.hxx new file mode 100644 index 0000000..605d236 --- /dev/null +++ b/xsd-examples/cxx/tree/streaming/parser.hxx @@ -0,0 +1,67 @@ +#ifndef PARSER_HXX +#define PARSER_HXX + +#include +#include +#include // std::size_t +#include // std::unique_ptr + +#include + +#include + +class parser_impl; + +class parser +{ +public: + // We can specify embedded XML Schema grammar to be used by the parser + // that was created by the xsdbin utility from the 'embedded' example. + // + parser (const XMLByte* grammar = 0, std::size_t grammar_size = 0); + ~parser (); + + // The start function prepares everything for parsing a new document. + // + void + start (std::istream& is, const std::string& id, bool validate); + + typedef xsd::cxx::xml::dom::unique_ptr document_ptr; + + // The peek function parses just the next element (ignoring any + // preceding content assuming it is whitespace) without parsing + // any of its nested content (but it includes the element's + // attributes). It returns NULL if there are no more elements + // at this level (there could still be on outer levels in case + // of nested streaming). + // + document_ptr + peek (); + + // The next function parses (or finishes parsing after peek) the + // next element including its nested content. It returns NULL if + // there are no more elements at this level (there could still + // be on outer levels in case of nested streaming). + // + // If doc is not NULL, then it should be the document returned + // by peek(). That is, a document with only the root element. + // In this case next() finishes parsing this element. + // + // If outer_doc is not NULL, then next() will first add doc to + // outer_doc as a child of the document root. + // + document_ptr + next (document_ptr doc = document_ptr (), + document_ptr outer_doc = document_ptr ()); + +private: + parser (const parser&); + + parser& + operator= (const parser&); + +private: + std::unique_ptr impl_; +}; + +#endif // PARSER_HXX diff --git a/xsd-examples/cxx/tree/streaming/position.xml b/xsd-examples/cxx/tree/streaming/position.xml new file mode 100644 index 0000000..3308306 --- /dev/null +++ b/xsd-examples/cxx/tree/streaming/position.xml @@ -0,0 +1,29 @@ + + + + + + +
+ Lion's Head + rock +
+ + + + + + + + + + +
diff --git a/xsd-examples/cxx/tree/streaming/position.xsd b/xsd-examples/cxx/tree/streaming/position.xsd new file mode 100644 index 0000000..0fbcf87 --- /dev/null +++ b/xsd-examples/cxx/tree/streaming/position.xsd @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/xsd-examples/cxx/tree/streaming/serializer.cxx b/xsd-examples/cxx/tree/streaming/serializer.cxx new file mode 100644 index 0000000..b903a49 --- /dev/null +++ b/xsd-examples/cxx/tree/streaming/serializer.cxx @@ -0,0 +1,636 @@ +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include "serializer.hxx" + +using namespace std; +using namespace xercesc; + +namespace xml = xsd::cxx::xml; +namespace tree = xsd::cxx::tree; + +static const XMLCh gEOLSeq[] = +{ + chLF, chNull +}; + +static const XMLCh gUTF8[] = +{ + chLatin_U, chLatin_T, chLatin_F, chDash, chDigit_8, chNull +}; + +static const XMLCh gEndElement[] = +{ + chOpenAngle, chForwardSlash, chNull +}; + +static const int DISCARD_DEFAULT_CONTENT_ID = 0x1; +static const int ENTITIES_ID = 0x2; +static const int FORMAT_PRETTY_PRINT_1ST_LEVEL_ID = 0xA; + +class StreamingDOMSerializer: public DOMLSSerializerImpl +{ +public: + StreamingDOMSerializer (MemoryManager* manager) + : DOMLSSerializerImpl (manager) + { + } + + bool + startOpen (const DOMElement* e, DOMLSOutput* const destination) + { + const DOMDocument* docu (e->getOwnerDocument ()); + assert (docu != 0); + + // Code adapted from DOMLSSerializerImpl::write(). + // + target_ = destination->getByteStream(); + + fEncodingUsed = gUTF8; + + const XMLCh* lsEncoding=destination->getEncoding(); + if (lsEncoding && *lsEncoding) + { + fEncodingUsed = lsEncoding; + } + else if (docu) + { + const XMLCh* tmpEncoding = docu->getInputEncoding(); + + if ( tmpEncoding && *tmpEncoding) + { + fEncodingUsed = tmpEncoding; + } + else + { + tmpEncoding = docu->getXmlEncoding(); + + if ( tmpEncoding && *tmpEncoding) + { + fEncodingUsed = tmpEncoding; + } + } + } + + fNewLineUsed = (fNewLine && *fNewLine)? fNewLine : gEOLSeq; + + fDocumentVersion = (docu->getXmlVersion() && *(docu->getXmlVersion())) + ? docu->getXmlVersion() + : XMLUni::fgVersion1_0; + + fErrorCount = 0; + + fLineFeedInTextNodePrinted = false; + fLastWhiteSpaceInTextNode = 0; + + level_ = 0; + namespace_map_.clear (); + + fFormatter = new (fMemoryManager) XMLFormatter( fEncodingUsed + ,fDocumentVersion + ,target_ + ,XMLFormatter::NoEscapes + ,XMLFormatter::UnRep_CharRef + ,fMemoryManager); + formatter_.reset (fFormatter); + + // Write out the XML declaration, etc. Here we assume that the document + // has no children (i.e., no root element). + // + processNode (docu, 0); + fLineFeedInTextNodePrinted = true; + + return writeOpen (e); + } + + bool + writeOpen (const DOMElement* e) + { + // Code adapted from the first part of ELEMENT_NODE case in + // DOMLSSerializerImpl::processNode(). + // + + if (!fLineFeedInTextNodePrinted) + { + if(level_ == 1 && getFeature(FORMAT_PRETTY_PRINT_1ST_LEVEL_ID)) + printNewLine(); + + printNewLine(); + } + else + { + fLineFeedInTextNodePrinted = false; + } + + printIndent(level_); + + RefHashTableOf* namespaceMap = NULL; + + *fFormatter << XMLFormatter::NoEscapes << chOpenAngle << + e->getNodeName (); + + setURCharRef(); + DOMNamedNodeMap *attributes = e->getAttributes(); + XMLSize_t attrCount = attributes->getLength(); + + const XMLCh* prefix = e->getPrefix(); + const XMLCh* uri = e->getNamespaceURI(); + if((uri && uri[0]) || + ((prefix==0 || prefix[0]==0) && isDefaultNamespacePrefixDeclared())) + { + if(prefix==0 || prefix[0]==0) + prefix=XMLUni::fgZeroLenString; + if(!isNamespaceBindingActive(prefix, uri)) + { + if(namespaceMap==NULL) + { + namespaceMap=new (fMemoryManager) RefHashTableOf(12, false, fMemoryManager); + fNamespaceStack->addElement(namespaceMap); + } + namespaceMap->put((void*)prefix,(XMLCh*)uri); + *fFormatter << XMLFormatter::NoEscapes + << chSpace << XMLUni::fgXMLNSString; + + if(!XMLString::equals(prefix,XMLUni::fgZeroLenString)) + *fFormatter << chColon << prefix; + + *fFormatter << chEqual << chDoubleQuote + << XMLFormatter::AttrEscapes + << uri + << XMLFormatter::NoEscapes + << chDoubleQuote; + } + } + + bool discard = getFeature(DISCARD_DEFAULT_CONTENT_ID); + for (XMLSize_t i = 0; i < attrCount; i++) + { + DOMAttr* attribute = (DOMAttr*)attributes->item(i); + + if (discard && !((DOMAttr*)attribute )->getSpecified()) + continue; + + // if this attribute is a namespace declaration, add it to the namespace map for the current level + const XMLCh* ns = attribute->getNamespaceURI(); + if (ns != 0 ) + { + if(XMLString::equals(ns, XMLUni::fgXMLNSURIName)) + { + if(namespaceMap==NULL) + { + namespaceMap=new (fMemoryManager) RefHashTableOf(12, false, fMemoryManager); + fNamespaceStack->addElement(namespaceMap); + } + const XMLCh* nsPrefix = attribute->getLocalName(); + if(XMLString::equals(attribute->getNodeName(),XMLUni::fgXMLNSString)) + nsPrefix = XMLUni::fgZeroLenString; + if(namespaceMap->containsKey((void*)nsPrefix)) + continue; + namespaceMap->put((void*)attribute->getLocalName(),(XMLCh*)attribute->getNodeValue()); + } + else if(!XMLString::equals(ns, XMLUni::fgXMLURIName)) + { + // check if the namespace for the current node is already defined + const XMLCh* prefix = attribute->getPrefix(); + if(prefix && prefix[0]) + { + const XMLCh* uri = attribute->getNamespaceURI(); + if(!isNamespaceBindingActive(prefix, uri)) + { + if(namespaceMap==NULL) + { + namespaceMap=new (fMemoryManager) RefHashTableOf(12, false, fMemoryManager); + fNamespaceStack->addElement(namespaceMap); + } + namespaceMap->put((void*)prefix,(XMLCh*)uri); + + *fFormatter << XMLFormatter::NoEscapes + << chSpace << XMLUni::fgXMLNSString << chColon << prefix + << chEqual << chDoubleQuote + << XMLFormatter::AttrEscapes + << uri + << XMLFormatter::NoEscapes + << chDoubleQuote; + } + } + } + } + + if (XMLString::equals(ns, XMLUni::fgXMLNSURIName) || checkFilter(attribute) == DOMNodeFilter::FILTER_ACCEPT) + { + *fFormatter << XMLFormatter::NoEscapes + << chSpace << attribute->getNodeName() + << chEqual << chDoubleQuote + << XMLFormatter::AttrEscapes; + + if (getFeature(ENTITIES_ID)) + { + DOMNode* child = attribute->getFirstChild(); + while( child != 0) + { + if(child->getNodeType()==DOMNode::TEXT_NODE) + *fFormatter << child->getNodeValue(); + else if(child->getNodeType()==DOMNode::ENTITY_REFERENCE_NODE) + *fFormatter << XMLFormatter::NoEscapes + << chAmpersand << child->getNodeName() << chSemiColon + << XMLFormatter::AttrEscapes; + child = child->getNextSibling(); + } + } + else + *fFormatter << attribute->getNodeValue(); + + *fFormatter << XMLFormatter::NoEscapes << chDoubleQuote; + } + } + + *fFormatter << XMLFormatter::NoEscapes << chCloseAngle; + + // Keep track of whether we have added a namespace map for this + // element. Used to pop it in writeClose(). + // + namespace_map_.push_back (namespaceMap != 0); + + level_++; + + DOMNode* child = e->getFirstChild(); + while (child != 0) + { + processNode (child, level_); + child = child->getNextSibling(); + } + + return fErrorCount == 0; + } + + bool + writeClose (const XMLCh* name) + { + // Code adapted from the second part of ELEMENT_NODE case in + // DOMLSSerializerImpl::processNode(). + // + level_--; + + // Assume we are not on the same line (nodeLine != fCurrentLine). + // + { + if (!fLineFeedInTextNodePrinted) + { + printNewLine(); + } + else + { + fLineFeedInTextNodePrinted = false; + } + + if(level_ == 0 && getFeature(FORMAT_PRETTY_PRINT_1ST_LEVEL_ID)) + printNewLine(); + + printIndent(level_); + } + + *fFormatter << XMLFormatter::NoEscapes << gEndElement << + name << chCloseAngle; + + if (namespace_map_.back ()) + fNamespaceStack->removeLastElement(); + + namespace_map_.pop_back (); + + if (level_ == 0) + { + printNewLine(); + target_->flush (); + } + + return fErrorCount == 0; + } + + bool + write (const DOMElement* e) + { + processNode (e, level_); + return fErrorCount == 0; + } + + using DOMLSSerializerImpl::write; // Whole document. + +public: + // Update the namespace stack to point to the strings from the + // new document's string pool. + // + void + update_namespace_stack (DOMDocument& d) + { + DOMDocumentImpl& di (dynamic_cast (d)); + + for (XMLSize_t i (0); i != fNamespaceStack->size (); ++i) + { + RefHashTableOf& t (*fNamespaceStack->elementAt (i)); + RefHashTableOfEnumerator e (&t, false, fMemoryManager); + while (e.hasMoreElements ()) + { + XMLCh* k ((XMLCh*) (e.nextElementKey ())); + XMLCh* v (t.get (k)); + t.put ((void*) (di.getPooledString (k)), + (XMLCh*) (di.getPooledString (v))); + } + } + } + +private: + XMLFormatTarget* target_; + std::unique_ptr formatter_; + int level_; + + std::vector namespace_map_; +}; + +class serializer_impl +{ +public: + typedef serializer::namespace_infomap namespace_infomap; + + serializer_impl (); + + void + start (ostream& os, const string& encoding); + + DOMElement* + create (const string& name, const namespace_infomap&); + + DOMElement* + create (const string& ns, const string& qname, const namespace_infomap&); + + void + serialize (xml::dom::unique_ptr); + + void + serialize_open (xml::dom::unique_ptr); + + void + serialize_close (const string&); + +private: + void + clear_document (); + +private: + bool start_; + + // Serializer. + // + xml::dom::unique_ptr out_; + xml::dom::unique_ptr serializer_; + + unique_ptr oft_; + + tree::error_handler error_handler_; + xml::dom::bits::error_handler_proxy error_proxy_; + + // DOM document that we use to create the elements. + // + DOMImplementation& dom_impl_; + xml::dom::unique_ptr doc_; + vector element_stack_; + + size_t element_count_; // Number of elements serialized using current doc. + static const size_t element_count_limit_ = 500; +}; + +const XMLCh ls[] = {chLatin_L, chLatin_S, chNull}; + +serializer_impl:: +serializer_impl () + : error_proxy_ (error_handler_), + dom_impl_ (*DOMImplementationRegistry::getDOMImplementation (ls)) +{ + serializer_.reset ( + new (XMLPlatformUtils::fgMemoryManager) + StreamingDOMSerializer (XMLPlatformUtils::fgMemoryManager)); + + DOMConfiguration* conf (serializer_->getDomConfig ()); + conf->setParameter (XMLUni::fgDOMErrorHandler, &error_proxy_); + conf->setParameter (XMLUni::fgDOMXMLDeclaration, true); + conf->setParameter (XMLUni::fgDOMWRTDiscardDefaultContent, true); + conf->setParameter (XMLUni::fgDOMWRTFormatPrettyPrint, true); + conf->setParameter (XMLUni::fgDOMWRTXercesPrettyPrint, false); +} + +void serializer_impl:: +start (ostream& os, const string& encoding) +{ + element_stack_.clear (); + doc_.reset (dom_impl_.createDocument ()); + element_count_ = 0; + + error_handler_.reset (); + oft_.reset (new xml::dom::ostream_format_target (os)); + + out_.reset (dom_impl_.createLSOutput ()); + out_->setEncoding (xml::string (encoding).c_str ()); + out_->setByteStream (oft_.get ()); + + start_ = true; +} + +DOMElement* serializer_impl:: +create (const string& name, const namespace_infomap& map) +{ + DOMElement* r (doc_->createElement (xml::string (name).c_str ())); + + if (!map.empty ()) + xml::dom::add_namespaces (*r, map); + + // Add the element as the child of the stack "tip" so that it + // "sees" all the namespace declarations active from this point. + // + if (!element_stack_.empty ()) + element_stack_.back ()->appendChild (r); + + return r; +} + +DOMElement* serializer_impl:: +create (const string& ns, const string& qname, const namespace_infomap& map) +{ + DOMElement* r ( + doc_->createElementNS ( + xml::string (ns).c_str (), xml::string (qname).c_str ())); + + if (!map.empty ()) + xml::dom::add_namespaces (*r, map); + + // Add the element as the child of the stack "tip" so that it + // "sees" all the namespace declarations active from this point. + // + if (!element_stack_.empty ()) + element_stack_.back ()->appendChild (r); + + return r; +} + +void serializer_impl:: +serialize (xml::dom::unique_ptr p) +{ + DOMElement* e (p.get ()); + + if (start_) + { + serializer_->write (e, out_.get ()); + start_ = false; + } + else + serializer_->write (e); + + error_handler_.throw_if_failed > (); + + // Remove this element from its parent before we release. + // + if (!element_stack_.empty ()) + element_stack_.back ()->removeChild (e); + + p.reset (); // Release it before we may clear the document below. + + if (element_count_++ > element_count_limit_) + clear_document (); +} + +void serializer_impl:: +serialize_open (xml::dom::unique_ptr p) +{ + DOMElement* e (p.get ()); + + if (start_) + { + serializer_->startOpen (e, out_.get ()); + start_ = false; + } + else + serializer_->writeOpen (e); + + error_handler_.throw_if_failed > (); + + // Add this element to the element stack. serialize_close() is + // responsible for its removal and releasing. + // + element_stack_.push_back (e); + p.release (); +} + +void serializer_impl:: +serialize_close (const string& name) +{ + serializer_->writeClose (xml::string (name).c_str ()); + error_handler_.throw_if_failed > (); + + // Release the element. + // + DOMElement* e (element_stack_.back ()); + element_stack_.pop_back (); + + if (!element_stack_.empty ()) + element_stack_.back ()->removeChild (e); + + e->release (); + + if (element_count_++ > element_count_limit_) + clear_document (); +} + +void serializer_impl:: +clear_document () +{ + // Re-create the document in order to force deallocation of its + // internal heap. While Xerces-C++ DOM tries to re-use memory, + // it still accumulates no longer used memory blocks. + // + xml::dom::unique_ptr doc (dom_impl_.createDocument ()); + + if (!element_stack_.empty ()) + { + DOMElement* e ( + static_cast ( + doc->importNode (element_stack_.front (), true))); + + for (vector::iterator i (element_stack_.begin ()); + i != element_stack_.end (); + ++i) + { + *i = e; + e = static_cast (e->getFirstChild ()); + } + } + + // Update the namespace stack to use the new document. + // + serializer_->update_namespace_stack (*doc); + + doc_ = move (doc); + element_count_ = 0; +} + +// +// serializer +// + +serializer:: +~serializer () +{ +} + +serializer:: +serializer () + : impl_ (new serializer_impl) +{ +} + +void serializer:: +start (ostream& os, const string& encoding) +{ + impl_->start (os, encoding); +} + +DOMElement* serializer:: +create (const string& name, const namespace_infomap& map) +{ + return impl_->create (name, map); +} + +DOMElement* serializer:: +create (const string& ns, const string& qname, const namespace_infomap& map) +{ + return impl_->create (ns, qname, map); +} + +void serializer:: +serialize (xml::dom::unique_ptr e) +{ + impl_->serialize (move (e)); +} + +void serializer:: +serialize_open (xml::dom::unique_ptr e) +{ + impl_->serialize_open (move (e)); +} + +void serializer:: +serialize_close (const string& name) +{ + impl_->serialize_close (name); +} diff --git a/xsd-examples/cxx/tree/streaming/serializer.hxx b/xsd-examples/cxx/tree/streaming/serializer.hxx new file mode 100644 index 0000000..585bd76 --- /dev/null +++ b/xsd-examples/cxx/tree/streaming/serializer.hxx @@ -0,0 +1,209 @@ +// file : cxx/tree/streaming/serializer.hxx +// author : Boris Kolpackov +// copyright : not copyrighted - public domain + +#ifndef SERIALIZER_HXX +#define SERIALIZER_HXX + +#include +#include +#include // std::unique_ptr + +#include + +#include +#include // namespace_infomap + +class serializer_impl; + +class serializer +{ +public: + typedef xsd::cxx::xml::dom::namespace_infomap namespace_infomap; + + ~serializer (); + serializer (); + + // Start the serialization process. + // + void + start (std::ostream& is, const std::string& encoding = "UTF-8"); + + // Serialize next object model fragment into an element with the specified + // name. + // + template + void + next (const std::string& name, const T& x); + + // Serialize next object model fragment into an element with the specified + // name and namespace declarations. + // + template + void + next (const std::string& name, const namespace_infomap&, const T& x); + + // Serialize next object model fragment into an element with the specified + // namespace and qualified name. + // + template + void + next (const std::string& ns, const std::string& name, const T& x); + + // Serialize next object model fragment into an element with the specified + // namespace and qualified name as well as namespace declarations. + // + template + void + next (const std::string& ns, + const std::string& name, + const namespace_infomap&, + const T& x); + + // The next_open/close functions are like next() but split into two steps. + // next_open() serializes the object model fragment into an element leaving + // it open while next_close() closes the element. + // + template + void + next_open (const std::string& name, const T& x); + + template + void + next_open (const std::string& name, const namespace_infomap&, const T& x); + + template + void + next_open (const std::string& ns, const std::string& name, const T& x); + + template + void + next_open (const std::string& ns, + const std::string& name, + const namespace_infomap&, + const T& x); + + void + next_close (const std::string& name); + +private: + serializer (const serializer&); + + serializer& + operator= (const serializer&); + +private: + xercesc::DOMElement* + create (const std::string& name, const namespace_infomap&); + + xercesc::DOMElement* + create (const std::string& ns, + const std::string& name, + const namespace_infomap&); + + void + serialize (xsd::cxx::xml::dom::unique_ptr); + + void + serialize_open (xsd::cxx::xml::dom::unique_ptr); + + void + serialize_close (const std::string& name); + +private: + std::unique_ptr impl_; +}; + +template +inline void serializer:: +next (const std::string& name, const T& x) +{ + xsd::cxx::xml::dom::unique_ptr e ( + create (name, namespace_infomap ())); + *e << x; + serialize (std::move (e)); +} + +template +inline void serializer:: +next (const std::string& name, const namespace_infomap& map, const T& x) +{ + xsd::cxx::xml::dom::unique_ptr e (create (name, map)); + *e << x; + serialize (std::move (e)); +} + +template +inline void serializer:: +next (const std::string& ns, const std::string& name, const T& x) +{ + xsd::cxx::xml::dom::unique_ptr e ( + create (ns, name, namespace_infomap ())); + *e << x; + serialize (std::move (e)); +} + +template +inline void serializer:: +next (const std::string& ns, + const std::string& name, + const namespace_infomap& map, + const T& x) +{ + xsd::cxx::xml::dom::unique_ptr e ( + create (ns, name, map)); + + *e << x; + serialize (std::move (e)); +} + +template +inline void serializer:: +next_open (const std::string& name, const T& x) +{ + xsd::cxx::xml::dom::unique_ptr e ( + create (name, namespace_infomap ())); + *e << x; + serialize_open (std::move (e)); +} + +template +inline void serializer:: +next_open (const std::string& name, const namespace_infomap& map, const T& x) +{ + xsd::cxx::xml::dom::unique_ptr e (create (name, map)); + *e << x; + serialize_open (std::move (e)); +} + +template +inline void serializer:: +next_open (const std::string& ns, const std::string& name, const T& x) +{ + xsd::cxx::xml::dom::unique_ptr e ( + create (ns, name, namespace_infomap ())); + *e << x; + serialize_open (std::move (e)); +} + +template +inline void serializer:: +next_open (const std::string& ns, + const std::string& name, + const namespace_infomap& map, + const T& x) +{ + xsd::cxx::xml::dom::unique_ptr e ( + create (ns, name, map)); + + *e << x; + serialize_open (std::move (e)); +} + +inline void serializer:: +next_close (const std::string& name) +{ + serialize_close (name); +} + +#endif // SERIALIZER_HXX diff --git a/xsd-examples/cxx/tree/streaming/testscript b/xsd-examples/cxx/tree/streaming/testscript new file mode 100644 index 0000000..d3c744d --- /dev/null +++ b/xsd-examples/cxx/tree/streaming/testscript @@ -0,0 +1,10 @@ +# file : cxx/tree/streaming/testscript +# license : not copyrighted - public domain + +: position +: +{ + $* $src_base/position.xml 2>| &out.xml; + echo '' >|; + cat out.xml >| +} -- cgit v1.1