From 741f84835e207e219eb3093eaad845c941ba2818 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Wed, 25 Nov 2009 17:15:57 +0200 Subject: New example that shows how to embed schema grammar into application --- examples/cxx/tree/README | 5 + examples/cxx/tree/caching/README | 4 +- examples/cxx/tree/embedded/README | 48 ++ examples/cxx/tree/embedded/driver.cxx | 214 +++++++++ .../cxx/tree/embedded/grammar-input-stream.cxx | 115 +++++ .../cxx/tree/embedded/grammar-input-stream.hxx | 53 +++ examples/cxx/tree/embedded/library.xml | 53 +++ examples/cxx/tree/embedded/library.xsd | 73 +++ examples/cxx/tree/embedded/makefile | 122 +++++ examples/cxx/tree/embedded/xsdbin.cxx | 505 +++++++++++++++++++++ examples/cxx/tree/makefile | 8 +- 11 files changed, 1194 insertions(+), 6 deletions(-) create mode 100644 examples/cxx/tree/embedded/README create mode 100644 examples/cxx/tree/embedded/driver.cxx create mode 100644 examples/cxx/tree/embedded/grammar-input-stream.cxx create mode 100644 examples/cxx/tree/embedded/grammar-input-stream.hxx create mode 100644 examples/cxx/tree/embedded/library.xml create mode 100644 examples/cxx/tree/embedded/library.xsd create mode 100644 examples/cxx/tree/embedded/makefile create mode 100644 examples/cxx/tree/embedded/xsdbin.cxx (limited to 'examples') diff --git a/examples/cxx/tree/README b/examples/cxx/tree/README index b6993d9..e2ba8de 100644 --- a/examples/cxx/tree/README +++ b/examples/cxx/tree/README @@ -43,6 +43,11 @@ caching Shows how to parse several XML documents while reusing the underlying XML parser and caching the schemas used for validation. +embedded + Shows how to embed the binary representation of the schema grammar + into an application and then use it with the C++/Tree mapping to + parse and validate XML documents. + performance Measures the performance of parsing and serialization. This example also shows how to structure your code to achieve the maximum diff --git a/examples/cxx/tree/caching/README b/examples/cxx/tree/caching/README index 1bbb590..64e5a1a 100644 --- a/examples/cxx/tree/caching/README +++ b/examples/cxx/tree/caching/README @@ -1,4 +1,4 @@ -This example shows how to use the the C++/Tree mapping to parse several +This example shows how to use the C++/Tree mapping to parse several XML documents while reusing the underlying XML parser and caching the schemas used for validation. @@ -22,7 +22,7 @@ driver.cxx ten iterations that parse the input file to a DOM document using the DOM parser and call one of the parsing functions that constructs the object model from this DOM document. On each iteration the driver - prints a number of book in the object model to STDERR. + prints a number of books in the object model to STDERR. To run the example on the sample XML instance document simply execute: diff --git a/examples/cxx/tree/embedded/README b/examples/cxx/tree/embedded/README new file mode 100644 index 0000000..266a8ff --- /dev/null +++ b/examples/cxx/tree/embedded/README @@ -0,0 +1,48 @@ +This example shows how to embed the binary representation of the schema +grammar into an application and then use it with the C++/Tree mapping to +parse and validate XML documents. This example is similar to the 'caching' +example except that it loads the binary representation of the schemas +embedded into the application instead of pre-parsing external schema files. + +The example consists of the following files: + +xsdbin.cxx + Tool for converting one or more XML Schema files to the compressed binary + representation. The output is written as a pair of C++ source files + containing the array with the binary data. Use the --help option to see + the tool's usage information. + +library.xsd + XML Schema which describes a library of books. + +library.xml + Sample XML instance document. + +library.hxx +library.cxx + C++ types that represent the given vocabulary and a set of parsing + functions that convert XML instance documents to a tree-like in-memory + object model. These are generated by the XSD compiler from library.xsd. + +library-schema.hxx +library-schema.cxx + Binary representation of the library.xsd schema. These files are generated + by the xsdbin tool. + +grammar-input-stream.hxx +grammar-input-stream.cxx + Input stream implementation with the special-purpose schema grammar + decompression algorithm. It is used to load the binary schema representation + produced by the xsdbin tool. + +driver.cxx + Driver for the example. It first sets up the Xerces-C++ DOM parser and + loads the embedded binary schema grammar for validation. It then performs + ten iterations that parse the input file to a DOM document using the DOM + parser and call one of the parsing functions that constructs the object + model from this DOM document. On each iteration the driver prints a number + of books in the object model to STDERR. + +To run the example on the sample XML instance document simply execute: + +$ ./driver library.xml diff --git a/examples/cxx/tree/embedded/driver.cxx b/examples/cxx/tree/embedded/driver.cxx new file mode 100644 index 0000000..3c49702 --- /dev/null +++ b/examples/cxx/tree/embedded/driver.cxx @@ -0,0 +1,214 @@ +// file : examples/cxx/tree/embedded/driver.cxx +// author : Boris Kolpackov +// copyright : not copyrighted - public domain + +#include // std::auto_ptr +#include +#include + +#include +#include // chLatin_* +#include +#include // xercesc::Grammar +#include + +#if _XERCES_VERSION >= 30000 +# include +#else +# include +#endif + +#include +#include +#include +#include + +#include + +#include "library.hxx" +#include "library-schema.hxx" +#include "grammar-input-stream.hxx" + +using namespace std; + +int +main (int argc, char* argv[]) +{ + if (argc != 2) + { + cerr << "usage: " << argv[0] << " library.xml" << endl; + return 1; + } + + int r (0); + + // We need to initialize the Xerces-C++ runtime because we + // are doing the XML-to-DOM parsing ourselves. + // + xercesc::XMLPlatformUtils::Initialize (); + + try + { + using namespace xercesc; + namespace xml = xsd::cxx::xml; + namespace tree = xsd::cxx::tree; + + // Create and load the grammar pool. + // + MemoryManager* mm (XMLPlatformUtils::fgMemoryManager); + + auto_ptr gp (new XMLGrammarPoolImpl (mm)); + + try + { + grammar_input_stream is (library_schema, sizeof (library_schema)); + gp->deserializeGrammars(&is); + } + catch(const XSerializationException& e) + { + cerr << "unable to load schema: " << + xml::transcode (e.getMessage ()) << endl; + return 1; + } + + // Get an implementation of the Load-Store (LS) interface. + // + const XMLCh ls_id [] = {chLatin_L, chLatin_S, chNull}; + + DOMImplementation* impl ( + DOMImplementationRegistry::getDOMImplementation (ls_id)); + +#if _XERCES_VERSION >= 30000 + + // Xerces-C++ 3.0.0 and later. + // + xml::dom::auto_ptr parser ( + impl->createLSParser ( + DOMImplementationLS::MODE_SYNCHRONOUS, 0, mm, gp.get ())); + + DOMConfiguration* conf (parser->getDomConfig ()); + + // Discard comment nodes in the document. + // + conf->setParameter (XMLUni::fgDOMComments, false); + + // Enable datatype normalization. + // + conf->setParameter (XMLUni::fgDOMDatatypeNormalization, true); + + // Do not create EntityReference nodes in the DOM tree. No + // EntityReference nodes will be created, only the nodes + // corresponding to their fully expanded substitution text + // will be created. + // + conf->setParameter (XMLUni::fgDOMEntities, false); + + // Perform namespace processing. + // + conf->setParameter (XMLUni::fgDOMNamespaces, true); + + // Do not include ignorable whitespace in the DOM tree. + // + conf->setParameter (XMLUni::fgDOMElementContentWhitespace, false); + + // Enable validation. + // + conf->setParameter (XMLUni::fgDOMValidate, true); + conf->setParameter (XMLUni::fgXercesSchema, true); + conf->setParameter (XMLUni::fgXercesSchemaFullChecking, false); + + // Xerces-C++ 3.1.0 is the first version with working multi import + // support. + // +#if _XERCES_VERSION >= 30100 + conf->setParameter (XMLUni::fgXercesHandleMultipleImports, true); +#endif + + // Use the loaded grammar during parsing. + // + conf->setParameter (XMLUni::fgXercesUseCachedGrammarInParse, true); + + // Disable loading schemas via other means (e.g., schemaLocation). + // + conf->setParameter (XMLUni::fgXercesLoadSchema, false); + + // We will release the DOM document ourselves. + // + conf->setParameter (XMLUni::fgXercesUserAdoptsDOMDocument, true); + + // Set error handler. + // + tree::error_handler eh; + xml::dom::bits::error_handler_proxy ehp (eh); + conf->setParameter (XMLUni::fgDOMErrorHandler, &ehp); + +#else // _XERCES_VERSION >= 30000 + + // Same as above but for Xerces-C++ 2 series. + // + xml::dom::auto_ptr parser ( + impl->createDOMBuilder( + DOMImplementationLS::MODE_SYNCHRONOUS, 0, mm, gp.get ())); + + + parser->setFeature (XMLUni::fgDOMComments, false); + parser->setFeature (XMLUni::fgDOMDatatypeNormalization, true); + parser->setFeature (XMLUni::fgDOMEntities, false); + parser->setFeature (XMLUni::fgDOMNamespaces, true); + parser->setFeature (XMLUni::fgDOMWhitespaceInElementContent, false); + parser->setFeature (XMLUni::fgDOMValidation, true); + parser->setFeature (XMLUni::fgXercesSchema, true); + parser->setFeature (XMLUni::fgXercesSchemaFullChecking, false); + parser->setFeature (XMLUni::fgXercesUseCachedGrammarInParse, true); + parser->setFeature (XMLUni::fgXercesUserAdoptsDOMDocument, true); + + tree::error_handler eh; + xml::dom::bits::error_handler_proxy ehp (eh); + parser->setErrorHandler (&ehp); + +#endif // _XERCES_VERSION >= 30000 + + // Parse XML documents. + // + for (unsigned long i (0); i < 10; ++i) + { + ifstream ifs; + ifs.exceptions (ifstream::badbit | ifstream::failbit); + ifs.open (argv[1]); + + // Wrap the standard input stream. + // + xml::sax::std_input_source isrc (ifs, argv[1]); + Wrapper4InputSource wrap (&isrc, false); + + // Parse XML to DOM. + // +#if _XERCES_VERSION >= 30000 + xml_schema::dom::auto_ptr doc (parser->parse (&wrap)); +#else + xml_schema::dom::auto_ptr doc (parser->parse (wrap)); +#endif + + eh.throw_if_failed (); + + // Parse DOM to the object model. + // + auto_ptr c (library::catalog_ (*doc)); + + cerr << "catalog with " << c->book ().size () << " books" << endl; + } + } + catch (const xml_schema::exception& e) + { + cerr << e << endl; + r = 1; + } + catch (const std::ios_base::failure&) + { + cerr << argv[1] << ": unable to open or read failure" << endl; + r = 1; + } + + xercesc::XMLPlatformUtils::Terminate (); + return r; +} diff --git a/examples/cxx/tree/embedded/grammar-input-stream.cxx b/examples/cxx/tree/embedded/grammar-input-stream.cxx new file mode 100644 index 0000000..0c94ea6 --- /dev/null +++ b/examples/cxx/tree/embedded/grammar-input-stream.cxx @@ -0,0 +1,115 @@ +// file : examples/cxx/tree/embedded/grammar-input-stream.cxx +// author : Boris Kolpackov +// copyright : not copyrighted - public domain + +#include +#include "grammar-input-stream.hxx" + +grammar_input_stream:: +grammar_input_stream (const XMLByte* data, std::size_t size) + : data_ (data), + size_ (size), + pos_ (0), + vpos_ (0), + cseq_ (0), + add_zero_ (false) +{ +} + +#if _XERCES_VERSION >= 30000 +XMLFilePos grammar_input_stream:: +curPos () const +{ + return static_cast (vpos_); +} +#else +unsigned int grammar_input_stream:: +curPos () const +{ + return static_cast (vpos_); +} +#endif + +#if _XERCES_VERSION >= 30000 +XMLSize_t grammar_input_stream:: +readBytes (XMLByte* const buf, const XMLSize_t size) +#else +unsigned int grammar_input_stream:: +readBytes (XMLByte* const buf, const unsigned int size) +#endif +{ + std::size_t i (0); + + // Add a zero from the alternating sequence if it didn't + // fit on the previous read. + // + if (add_zero_) + { + buf[i++] = 0; + add_zero_ = false; + } + + // If have an unfinished sequential sequence, output it now. + // + if (cseq_ != 0 && !alt_) + { + for (; cseq_ != 0 && i < size; --cseq_) + buf[i++] = 0; + } + + for (; i < size && pos_ < size_;) + { + XMLByte b = buf[i++] = data_[pos_++]; + + // See if we are in a compression sequence. + // + if (cseq_ != 0) + { + if (i < size) + buf[i++] = 0; + else + add_zero_ = true; // Add it on the next read. + + cseq_--; + continue; + } + + // If we are not in a compression sequence and this byte is + // not zero then we are done. + // + if (b != 0) + continue; + + // We have a zero. + // + assert (pos_ < size_); // There has to be another byte. + unsigned char v (static_cast (data_[pos_++])); + alt_ = (v & 128) != 0; + cseq_ = v & 127; + + // If it is a sequential sequence, output as many zeros as + // we can. + // + if (!alt_) + { + for (; cseq_ != 0 && i < size; --cseq_) + buf[i++] = 0; + } + } + + vpos_ += i; + +#if _XERCES_VERSION >= 30000 + return static_cast (i); +#else + return static_cast (i); +#endif +} + +#if _XERCES_VERSION >= 30000 +const XMLCh* grammar_input_stream:: +getContentType () const +{ + return 0; +} +#endif diff --git a/examples/cxx/tree/embedded/grammar-input-stream.hxx b/examples/cxx/tree/embedded/grammar-input-stream.hxx new file mode 100644 index 0000000..a1b73c6 --- /dev/null +++ b/examples/cxx/tree/embedded/grammar-input-stream.hxx @@ -0,0 +1,53 @@ +// file : examples/cxx/tree/embedded/grammar-input-stream.hxx +// author : Boris Kolpackov +// copyright : not copyrighted - public domain + +#ifndef GRAMMAR_INPUT_STREAM_HXX +#define GRAMMAR_INPUT_STREAM_HXX + +#include +#include + +// Memory buffer input stream with the special-purpose schema +// grammar decompression. +// +class grammar_input_stream: public xercesc::BinInputStream +{ +public : + grammar_input_stream (const XMLByte* data, std::size_t size); + +#if _XERCES_VERSION >= 30000 + + virtual XMLFilePos + curPos () const; + + virtual XMLSize_t + readBytes (XMLByte* const buf, const XMLSize_t size); + + virtual const XMLCh* + getContentType () const; + +#else + + virtual unsigned int + curPos () const; + + virtual unsigned int + readBytes (XMLByte* const buf, const unsigned int size); + +#endif + +private : + const XMLByte* data_; + std::size_t size_; + std::size_t pos_; + std::size_t vpos_; + + // Compression data. + // + size_t cseq_; // Number of bytes left in a compression sequence. + bool alt_; // Alternating or sequential sequence. + bool add_zero_; // Add a zero on the next read. +}; + +#endif // GRAMMAR_INPUT_STREAM_HXX diff --git a/examples/cxx/tree/embedded/library.xml b/examples/cxx/tree/embedded/library.xml new file mode 100644 index 0000000..cb8faf3 --- /dev/null +++ b/examples/cxx/tree/embedded/library.xml @@ -0,0 +1,53 @@ + + + + + + + + 0679760806 + The Master and Margarita + fiction + + + Mikhail Bulgakov + 1891-05-15 + 1940-03-10 + + + + + + 0679600841 + War and Peace + history + + + Leo Tolstoy + 1828-09-09 + 1910-11-20 + + + + + + 0679420290 + Crime and Punishment + philosophy + + + Fyodor Dostoevsky + 1821-11-11 + 1881-02-09 + + + + diff --git a/examples/cxx/tree/embedded/library.xsd b/examples/cxx/tree/embedded/library.xsd new file mode 100644 index 0000000..f1b4dac --- /dev/null +++ b/examples/cxx/tree/embedded/library.xsd @@ -0,0 +1,73 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/examples/cxx/tree/embedded/makefile b/examples/cxx/tree/embedded/makefile new file mode 100644 index 0000000..633ca60 --- /dev/null +++ b/examples/cxx/tree/embedded/makefile @@ -0,0 +1,122 @@ +# file : examples/cxx/tree/embedded/makefile +# author : Boris Kolpackov +# copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC +# license : GNU GPL v2 + exceptions; see accompanying LICENSE file + +include $(dir $(lastword $(MAKEFILE_LIST)))../../../../build/bootstrap.make + +xsd := library.xsd +cxx := driver.cxx grammar-input-stream.cxx + +obj := $(addprefix $(out_base)/,$(cxx:.cxx=.o) $(xsd:.xsd=.o) \ +$(xsd:.xsd=-schema.o)) +obj2 := $(out_base)/xsdbin.o +dep := $(obj:.o=.o.d) $(obj2:.o=.o.d) + +driver := $(out_base)/driver +xsdbin := $(out_base)/xsdbin +install := $(out_base)/.install +dist := $(out_base)/.dist +dist-win := $(out_base)/.dist-win +clean := $(out_base)/.clean + + +# Import. +# +$(call import,\ + $(scf_root)/import/libxerces-c/stub.make,\ + l: xerces_c.l,cpp-options: xerces_c.l.cpp-options) + + +# Build. +# +$(driver): $(obj) $(xerces_c.l) +$(xsdbin): $(obj2) $(xerces_c.l) + +$(obj) $(dep): cpp_options := -I$(src_root)/libxsd +$(obj) $(obj2) $(dep): $(xerces_c.l.cpp-options) + +genf := $(xsd:.xsd=.hxx) $(xsd:.xsd=.ixx) $(xsd:.xsd=.cxx) +gen := $(addprefix $(out_base)/,$(genf)) + +$(gen): xsd := $(out_root)/xsd/xsd +$(gen): xsd_options := +$(gen): $(out_root)/xsd/xsd + +genf += $(xsd:.xsd=-schema.hxx) $(xsd:.xsd=-schema.cxx) + +.PRECIOUS: $(out_base)/%-schema.hxx $(out_base)/%-schema.cxx +$(out_base)/%-schema.hxx $(out_base)/%-schema.cxx: $(src_base)/%.xsd $(xsdbin) + $(call message,xsdbin $<,$(xsdbin) --output-dir $(out_base) $<) + +$(call include-dep,$(dep)) + +# Convenience alias for default target. +# +$(out_base)/: $(driver) + + +# Install & Dist. +# +dist-common := $(out_base)/.dist-common + +$(install) $(dist) $(dist-win) $(dist-common): path := $(subst $(src_root)/,,$(src_base)) + +$(install): + $(call install-data,$(src_base)/README,$(install_doc_dir)/xsd/$(path)/README) + $(call install-data,$(src_base)/xsdbin.cxx,$(install_doc_dir)/xsd/$(path)/xsdbin.cxx) + $(call install-data,$(src_base)/driver.cxx,$(install_doc_dir)/xsd/$(path)/driver.cxx) + $(call install-data,$(src_base)/grammar-input-stream.hxx,$(install_doc_dir)/xsd/$(path)/grammar-input-stream.hxx) + $(call install-data,$(src_base)/grammar-input-stream.cxx,$(install_doc_dir)/xsd/$(path)/grammar-input-stream.cxx) + $(call install-data,$(src_base)/library.xsd,$(install_doc_dir)/xsd/$(path)/library.xsd) + $(call install-data,$(src_base)/library.xml,$(install_doc_dir)/xsd/$(path)/library.xml) + +$(dist-common): + $(call install-data,$(src_base)/xsdbin.cxx,$(dist_prefix)/$(path)/xsdbin.cxx) + $(call install-data,$(src_base)/driver.cxx,$(dist_prefix)/$(path)/driver.cxx) + $(call install-data,$(src_base)/grammar-input-stream.hxx,$(dist_prefix)/$(path)/grammar-input-stream.hxx) + $(call install-data,$(src_base)/grammar-input-stream.cxx,$(dist_prefix)/$(path)/grammar-input-stream.cxx) + $(call install-data,$(src_base)/library.xsd,$(dist_prefix)/$(path)/library.xsd) + $(call install-data,$(src_base)/library.xml,$(dist_prefix)/$(path)/library.xml) + +$(dist): $(dist-common) + $(call install-data,$(src_base)/README,$(dist_prefix)/$(path)/README) + +$(dist-win): $(dist-common) + $(call install-data,$(src_base)/README,$(dist_prefix)/$(path)/README.txt) + $(call message,,unix2dos $(dist_prefix)/$(path)/README.txt) + + +# Clean. +# +$(clean): files := $(out_base)/$(xsd:.xsd=-schema.?xx) +$(clean): $(driver).o.clean $(xsdbin).o.clean \ + $(addsuffix .cxx.clean,$(obj)) \ + $(addsuffix .cxx.clean,$(obj2)) \ + $(addsuffix .cxx.clean,$(dep)) \ + $(addprefix $(out_base)/,$(xsd:.xsd=.cxx.xsd.clean)) + $(call message,rm '$(out_base)/*-schema.?xx',rm -f $(files)) + +# Generated .gitignore. +# +ifeq ($(out_base),$(src_base)) +$(gen): | $(out_base)/.gitignore +$(driver): | $(out_base)/.gitignore + +$(out_base)/.gitignore: files := driver xsdbin $(genf) +$(clean): $(out_base)/.gitignore.clean + +$(call include,$(bld_root)/git/gitignore.make) +endif + +# How to. +# +$(call include,$(bld_root)/cxx/o-e.make) +$(call include,$(bld_root)/cxx/cxx-o.make) +$(call include,$(bld_root)/cxx/cxx-d.make) +$(call include,$(bld_root)/install.make) +$(call include,$(scf_root)/xsd/tree/xsd-cxx.make) + +# Dependencies. +# +$(call import,$(src_root)/xsd/makefile) diff --git a/examples/cxx/tree/embedded/xsdbin.cxx b/examples/cxx/tree/embedded/xsdbin.cxx new file mode 100644 index 0000000..53e2533 --- /dev/null +++ b/examples/cxx/tree/embedded/xsdbin.cxx @@ -0,0 +1,505 @@ +// file : examples/cxx/tree/embedded/xsdbin.cxx +// author : Boris Kolpackov +// copyright : not copyrighted - public domain + +// This program loads the XML Schema file(s) and converts them to +// the Xerces-C++ binary schema format which can then be embedded +// into C++ programs and used to validate XML documents. The output +// is written as a C++ source file containing the array with the +// binary data. +// + +#include +#include // std::auto_ptr +#include // std::size_t +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#if _XERCES_VERSION >= 30000 +# include +#else +# include +#endif + +using namespace std; +using namespace xercesc; + +class error_handler: public ErrorHandler +{ +public: + error_handler () + : failed_ (false) + { + } + + bool + failed () const + { + return failed_; + } + + enum severity {s_warning, s_error, s_fatal}; + + virtual void + warning (const SAXParseException&); + + virtual void + error (const SAXParseException&); + + virtual void + fatalError (const SAXParseException&); + + virtual void + resetErrors () + { + failed_ = false; + } + + void + handle (const SAXParseException&, severity); + +private: + bool failed_; +}; + +void +cxx_escape (string&); + +int +main (int argc, char* argv[]) +{ + const char* hxx_suffix = "-schema.hxx"; + const char* cxx_suffix = "-schema.cxx"; + + string name; + string base; + string outdir; + + class usage {}; + + int argi (1); + bool help (false); + bool multi_import (true); + bool verbose (false); + + try + { + for (; argi < argc; ++argi) + { + string a (argv[argi]); + + if (a == "--help") + { + help = true; + throw usage (); + } + else if (a == "--verbose") + { + verbose = true; + } + else if (a == "--hxx-suffix") + { + if (++argi >= argc) + throw usage (); + + hxx_suffix = argv[argi]; + } + else if (a == "--cxx-suffix") + { + if (++argi >= argc) + throw usage (); + + cxx_suffix = argv[argi]; + } + else if (a == "--output-dir") + { + if (++argi >= argc) + throw usage (); + + outdir = argv[argi]; + } + else if (a == "--array-name") + { + if (++argi >= argc) + throw usage (); + + name = argv[argi]; + } + else if (a == "--disable-multi-import") + { + multi_import = false; + } + else + break; + } + + if (argi >= argc) + { + cerr << "no input file specified" << endl; + throw usage (); + } + + base = argv[argi]; + } + catch (usage const&) + { + cerr << "Usage: " << argv[0] << " [options] " << endl + << "Options:" << endl + << " --help Print usage information and exit." << endl + << " --verbose Print progress information." << endl + << " --output-dir Write generated files to ." << endl + << " --hxx-suffix Header file suffix instead of '-schema.hxx'." << endl + << " --cxx-suffix Source file suffix instead of '-schema.cxx'." << endl + << " --array-name Binary data array name." << endl + << " --disable-multi-import Disable multiple import support." << endl + << endl; + + return help ? 0 : 1; + } + + XMLPlatformUtils::Initialize (); + + { + MemoryManager* mm (XMLPlatformUtils::fgMemoryManager); + + auto_ptr gp (new XMLGrammarPoolImpl (mm)); + + // Load the schemas into grammar pool. + // + { + auto_ptr parser ( + XMLReaderFactory::createXMLReader (mm, gp.get ())); + + parser->setFeature (XMLUni::fgSAX2CoreNameSpaces, true); + parser->setFeature (XMLUni::fgSAX2CoreNameSpacePrefixes, true); + parser->setFeature (XMLUni::fgSAX2CoreValidation, true); + parser->setFeature (XMLUni::fgXercesSchema, true); + parser->setFeature (XMLUni::fgXercesSchemaFullChecking, true); + parser->setFeature (XMLUni::fgXercesValidationErrorAsFatal, true); + + // Xerces-C++ 3.1.0 is the first version with working multi import + // support. + // +#if _XERCES_VERSION >= 30100 + parser->setFeature (XMLUni::fgXercesHandleMultipleImports, multi_import); +#endif + + error_handler eh; + parser->setErrorHandler (&eh); + + for (; argi < argc; ++argi) + { + if (verbose) + cerr << "loading " << argv[argi] << endl; + + if (!parser->loadGrammar (argv[argi], Grammar::SchemaGrammarType, true)) + { + cerr << argv[argi] << ": error: unable to load" << endl; + return 1; + } + + if (eh.failed ()) + return 1; + } + } + + // Get the binary representation. + // + BinMemOutputStream data; + + try + { + gp->serializeGrammars (&data); + } + catch (const XSerializationException& e) + { + char* msg (XMLString::transcode (e.getMessage ())); + cerr << "error: " << msg << endl; + XMLString::release (&msg); + return 1; + } + + size_t n (static_cast (data.curPos ())); + const unsigned char* buf ( + static_cast (data.getRawBuffer ())); + + if (verbose) + cerr << "uncomressed data size " << n << " bytes" << endl; + + // Compress zeros. + // + size_t cn (0); + unsigned char* cbuf = new unsigned char[n]; + + size_t cseq (0); // Number of bytes left in a compression sequence. + bool alt (false); // Alternating or sequential sequence. + + for (size_t i (0); i < n;) + { + unsigned char v (buf[i++]); + + // See if we are in a compression sequence. + // + if (cseq != 0) + { + // See if this byte needs to be copied. + // + if (alt && cseq % 2 == 0) + cbuf[cn++] = v; + + cseq--; + continue; + } + + // If we are not in a compression sequence and this byte is + // not zero then simply copy it. + // + if (v != 0) + { + cbuf[cn++] = v; + continue; + } + + // We have a zero. + // + cbuf[cn++] = 0; + + // See if we can start a new compression sequence. + // + if (i < n) + { + if (buf[i] == 0) + { + // Sequential sequence. See how far it runs. + // + alt = false; + + for (cseq = 1; cseq < 127 && cseq + i < n; cseq++) + if (buf[cseq + i] != 0) + break; + } + else if (i + 1 < n && buf[i + 1] == 0) + { + // Alternating sequence. See how far it runs. + // + alt = true; + + for (cseq = 1; cseq < 127 && cseq * 2 + i + 1 < n; cseq++) + { + if (buf[cseq * 2 + i + 1] != 0) + break; + + // For longer sequences prefer sequential to alternating. + // + if (cseq > 2 && + buf[cseq * 2 + i] == 0 && + buf[(cseq - 1) * 2 + i] == 0 && + buf[(cseq - 2) * 2 + i] == 0) + { + cseq -= 2; + break; + } + } + + cseq *= 2; + } + } + + if (cseq != 0) + { + cbuf[cn++] = static_cast ( + alt ? (128 | cseq / 2) : cseq); + } + else + cbuf[cn++] = 0; + } + + if (verbose) + cerr << "comressed data size " << cn << " bytes" << endl; + + buf = cbuf; + n = cn; + + // Figure out the file names. + // + string::size_type p (base.rfind ('/')), p1 (base.rfind ('\\')); + + if (p1 != string::npos && p1 > p) + p = p1; + + if (p != string::npos) + base = string (base, p + 1); + + p = base.rfind ('.'); + + if (p != string::npos) + base.resize (p); + + string hxx (base + hxx_suffix); + string cxx (base + cxx_suffix); + + if (!outdir.empty ()) + { +#if defined (WIN32) || defined (__WIN32__) + hxx = outdir + '\\' + hxx; + cxx = outdir + '\\' + cxx; +#else + hxx = outdir + '/' + hxx; + cxx = outdir + '/' + cxx; +#endif + } + + if (name.empty ()) + { + name = base + "_schema"; + cxx_escape (name); + } + + // Write header. + // + { + ofstream os (hxx.c_str ()); + + if (!os.is_open ()) + { + cerr << hxx << ": error: unable to open" << endl; + return 1; + } + + os << "// Automatically generated. Do not edit." << endl + << "//" << endl + << endl + << "#include " << endl + << endl + << "extern const XMLByte " << name << "[" << n << "UL];" << endl; + } + + { + ofstream os (cxx.c_str ()); + + if (!os.is_open ()) + { + cerr << cxx << ": error: unable to open" << endl; + return 1; + } + + os << "// Automatically generated. Do not edit." << endl + << "//" << endl + << endl + << "#include " << endl + << "#include " << endl + << endl + << "#if XERCES_GRAMMAR_SERIALIZATION_LEVEL != " << + XERCES_GRAMMAR_SERIALIZATION_LEVEL << endl + << "# error incompatible Xerces-C++ version detected" << endl + << "#endif" << endl + << endl + << "extern const XMLByte " << name << "[" << n << "UL] =" << endl + << "{"; + + for (size_t i (0); i < n; ++i) + { + if (i != 0) + os << ','; + + os << (i % 12 == 0 ? "\n " : " ") << "0x"; + os.width (2); + os.fill ('0'); + os << hex << static_cast (buf[i]); + } + + os << endl + << "};" << endl + << endl; + } + + delete[] cbuf; + } + + XMLPlatformUtils::Terminate (); +} + +void +cxx_escape (string& s) +{ + for (string::size_type i (0); i < s.size (); ++i) + { + char& c (s[i]); + + if (i == 0) + { + if (!((c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + c == '_')) + c = '_'; + } + else + { + if (!((c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + c == '_')) + c = '_'; + } + } +} + +void error_handler:: +warning (const SAXParseException& e) +{ + handle (e, s_warning); +} + +void error_handler:: +error (const SAXParseException& e) +{ + failed_ = true; + handle (e, s_error); +} + +void error_handler:: +fatalError (const SAXParseException& e) +{ + failed_ = true; + handle (e, s_fatal); +} + +void error_handler:: +handle (const SAXParseException& e, severity s) +{ + const XMLCh* xid (e.getPublicId ()); + + if (xid == 0) + xid = e.getSystemId (); + + char* id (XMLString::transcode (xid)); + char* msg (XMLString::transcode (e.getMessage ())); + + cerr << id << ":"; + +#if _XERCES_VERSION >= 30000 + cerr << e.getLineNumber () << ":" << e.getColumnNumber () << " "; +#else + XMLSSize_t l (e.getLineNumber ()); + XMLSSize_t c (e.getColumnNumber ()); + cerr << (l == -1 ? 0 : l) << ":" << (c == -1 ? 0 : c) << " "; +#endif + + cerr << (s == s_warning ? "warning: " : "error: ") << msg << endl; + + XMLString::release (&id); + XMLString::release (&msg); +} diff --git a/examples/cxx/tree/makefile b/examples/cxx/tree/makefile index 15bcfb2..03c8101 100644 --- a/examples/cxx/tree/makefile +++ b/examples/cxx/tree/makefile @@ -5,11 +5,11 @@ include $(dir $(lastword $(MAKEFILE_LIST)))../../../build/bootstrap.make -all_examples := binary caching custom hello library messaging mixed \ -multiroot performance polymorphism streaming wildcard dbxml xpath +all_examples := binary caching embedded custom hello library messaging \ +mixed multiroot performance polymorphism streaming wildcard dbxml xpath -build_examples := binary caching custom hello library messaging mixed \ -multiroot performance polymorphism streaming wildcard +build_examples := binary caching embedded custom hello library messaging \ +mixed multiroot performance polymorphism streaming wildcard ifeq ($(xsd_with_dbxml),y) build_examples += dbxml -- cgit v1.1