From 741f84835e207e219eb3093eaad845c941ba2818 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Wed, 25 Nov 2009 17:15:57 +0200 Subject: New example that shows how to embed schema grammar into application --- examples/cxx/tree/embedded/xsdbin.cxx | 505 ++++++++++++++++++++++++++++++++++ 1 file changed, 505 insertions(+) create mode 100644 examples/cxx/tree/embedded/xsdbin.cxx (limited to 'examples/cxx/tree/embedded/xsdbin.cxx') diff --git a/examples/cxx/tree/embedded/xsdbin.cxx b/examples/cxx/tree/embedded/xsdbin.cxx new file mode 100644 index 0000000..53e2533 --- /dev/null +++ b/examples/cxx/tree/embedded/xsdbin.cxx @@ -0,0 +1,505 @@ +// file : examples/cxx/tree/embedded/xsdbin.cxx +// author : Boris Kolpackov +// copyright : not copyrighted - public domain + +// This program loads the XML Schema file(s) and converts them to +// the Xerces-C++ binary schema format which can then be embedded +// into C++ programs and used to validate XML documents. The output +// is written as a C++ source file containing the array with the +// binary data. +// + +#include +#include // std::auto_ptr +#include // std::size_t +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#if _XERCES_VERSION >= 30000 +# include +#else +# include +#endif + +using namespace std; +using namespace xercesc; + +class error_handler: public ErrorHandler +{ +public: + error_handler () + : failed_ (false) + { + } + + bool + failed () const + { + return failed_; + } + + enum severity {s_warning, s_error, s_fatal}; + + virtual void + warning (const SAXParseException&); + + virtual void + error (const SAXParseException&); + + virtual void + fatalError (const SAXParseException&); + + virtual void + resetErrors () + { + failed_ = false; + } + + void + handle (const SAXParseException&, severity); + +private: + bool failed_; +}; + +void +cxx_escape (string&); + +int +main (int argc, char* argv[]) +{ + const char* hxx_suffix = "-schema.hxx"; + const char* cxx_suffix = "-schema.cxx"; + + string name; + string base; + string outdir; + + class usage {}; + + int argi (1); + bool help (false); + bool multi_import (true); + bool verbose (false); + + try + { + for (; argi < argc; ++argi) + { + string a (argv[argi]); + + if (a == "--help") + { + help = true; + throw usage (); + } + else if (a == "--verbose") + { + verbose = true; + } + else if (a == "--hxx-suffix") + { + if (++argi >= argc) + throw usage (); + + hxx_suffix = argv[argi]; + } + else if (a == "--cxx-suffix") + { + if (++argi >= argc) + throw usage (); + + cxx_suffix = argv[argi]; + } + else if (a == "--output-dir") + { + if (++argi >= argc) + throw usage (); + + outdir = argv[argi]; + } + else if (a == "--array-name") + { + if (++argi >= argc) + throw usage (); + + name = argv[argi]; + } + else if (a == "--disable-multi-import") + { + multi_import = false; + } + else + break; + } + + if (argi >= argc) + { + cerr << "no input file specified" << endl; + throw usage (); + } + + base = argv[argi]; + } + catch (usage const&) + { + cerr << "Usage: " << argv[0] << " [options] " << endl + << "Options:" << endl + << " --help Print usage information and exit." << endl + << " --verbose Print progress information." << endl + << " --output-dir Write generated files to ." << endl + << " --hxx-suffix Header file suffix instead of '-schema.hxx'." << endl + << " --cxx-suffix Source file suffix instead of '-schema.cxx'." << endl + << " --array-name Binary data array name." << endl + << " --disable-multi-import Disable multiple import support." << endl + << endl; + + return help ? 0 : 1; + } + + XMLPlatformUtils::Initialize (); + + { + MemoryManager* mm (XMLPlatformUtils::fgMemoryManager); + + auto_ptr gp (new XMLGrammarPoolImpl (mm)); + + // Load the schemas into grammar pool. + // + { + auto_ptr parser ( + XMLReaderFactory::createXMLReader (mm, gp.get ())); + + parser->setFeature (XMLUni::fgSAX2CoreNameSpaces, true); + parser->setFeature (XMLUni::fgSAX2CoreNameSpacePrefixes, true); + parser->setFeature (XMLUni::fgSAX2CoreValidation, true); + parser->setFeature (XMLUni::fgXercesSchema, true); + parser->setFeature (XMLUni::fgXercesSchemaFullChecking, true); + parser->setFeature (XMLUni::fgXercesValidationErrorAsFatal, true); + + // Xerces-C++ 3.1.0 is the first version with working multi import + // support. + // +#if _XERCES_VERSION >= 30100 + parser->setFeature (XMLUni::fgXercesHandleMultipleImports, multi_import); +#endif + + error_handler eh; + parser->setErrorHandler (&eh); + + for (; argi < argc; ++argi) + { + if (verbose) + cerr << "loading " << argv[argi] << endl; + + if (!parser->loadGrammar (argv[argi], Grammar::SchemaGrammarType, true)) + { + cerr << argv[argi] << ": error: unable to load" << endl; + return 1; + } + + if (eh.failed ()) + return 1; + } + } + + // Get the binary representation. + // + BinMemOutputStream data; + + try + { + gp->serializeGrammars (&data); + } + catch (const XSerializationException& e) + { + char* msg (XMLString::transcode (e.getMessage ())); + cerr << "error: " << msg << endl; + XMLString::release (&msg); + return 1; + } + + size_t n (static_cast (data.curPos ())); + const unsigned char* buf ( + static_cast (data.getRawBuffer ())); + + if (verbose) + cerr << "uncomressed data size " << n << " bytes" << endl; + + // Compress zeros. + // + size_t cn (0); + unsigned char* cbuf = new unsigned char[n]; + + size_t cseq (0); // Number of bytes left in a compression sequence. + bool alt (false); // Alternating or sequential sequence. + + for (size_t i (0); i < n;) + { + unsigned char v (buf[i++]); + + // See if we are in a compression sequence. + // + if (cseq != 0) + { + // See if this byte needs to be copied. + // + if (alt && cseq % 2 == 0) + cbuf[cn++] = v; + + cseq--; + continue; + } + + // If we are not in a compression sequence and this byte is + // not zero then simply copy it. + // + if (v != 0) + { + cbuf[cn++] = v; + continue; + } + + // We have a zero. + // + cbuf[cn++] = 0; + + // See if we can start a new compression sequence. + // + if (i < n) + { + if (buf[i] == 0) + { + // Sequential sequence. See how far it runs. + // + alt = false; + + for (cseq = 1; cseq < 127 && cseq + i < n; cseq++) + if (buf[cseq + i] != 0) + break; + } + else if (i + 1 < n && buf[i + 1] == 0) + { + // Alternating sequence. See how far it runs. + // + alt = true; + + for (cseq = 1; cseq < 127 && cseq * 2 + i + 1 < n; cseq++) + { + if (buf[cseq * 2 + i + 1] != 0) + break; + + // For longer sequences prefer sequential to alternating. + // + if (cseq > 2 && + buf[cseq * 2 + i] == 0 && + buf[(cseq - 1) * 2 + i] == 0 && + buf[(cseq - 2) * 2 + i] == 0) + { + cseq -= 2; + break; + } + } + + cseq *= 2; + } + } + + if (cseq != 0) + { + cbuf[cn++] = static_cast ( + alt ? (128 | cseq / 2) : cseq); + } + else + cbuf[cn++] = 0; + } + + if (verbose) + cerr << "comressed data size " << cn << " bytes" << endl; + + buf = cbuf; + n = cn; + + // Figure out the file names. + // + string::size_type p (base.rfind ('/')), p1 (base.rfind ('\\')); + + if (p1 != string::npos && p1 > p) + p = p1; + + if (p != string::npos) + base = string (base, p + 1); + + p = base.rfind ('.'); + + if (p != string::npos) + base.resize (p); + + string hxx (base + hxx_suffix); + string cxx (base + cxx_suffix); + + if (!outdir.empty ()) + { +#if defined (WIN32) || defined (__WIN32__) + hxx = outdir + '\\' + hxx; + cxx = outdir + '\\' + cxx; +#else + hxx = outdir + '/' + hxx; + cxx = outdir + '/' + cxx; +#endif + } + + if (name.empty ()) + { + name = base + "_schema"; + cxx_escape (name); + } + + // Write header. + // + { + ofstream os (hxx.c_str ()); + + if (!os.is_open ()) + { + cerr << hxx << ": error: unable to open" << endl; + return 1; + } + + os << "// Automatically generated. Do not edit." << endl + << "//" << endl + << endl + << "#include " << endl + << endl + << "extern const XMLByte " << name << "[" << n << "UL];" << endl; + } + + { + ofstream os (cxx.c_str ()); + + if (!os.is_open ()) + { + cerr << cxx << ": error: unable to open" << endl; + return 1; + } + + os << "// Automatically generated. Do not edit." << endl + << "//" << endl + << endl + << "#include " << endl + << "#include " << endl + << endl + << "#if XERCES_GRAMMAR_SERIALIZATION_LEVEL != " << + XERCES_GRAMMAR_SERIALIZATION_LEVEL << endl + << "# error incompatible Xerces-C++ version detected" << endl + << "#endif" << endl + << endl + << "extern const XMLByte " << name << "[" << n << "UL] =" << endl + << "{"; + + for (size_t i (0); i < n; ++i) + { + if (i != 0) + os << ','; + + os << (i % 12 == 0 ? "\n " : " ") << "0x"; + os.width (2); + os.fill ('0'); + os << hex << static_cast (buf[i]); + } + + os << endl + << "};" << endl + << endl; + } + + delete[] cbuf; + } + + XMLPlatformUtils::Terminate (); +} + +void +cxx_escape (string& s) +{ + for (string::size_type i (0); i < s.size (); ++i) + { + char& c (s[i]); + + if (i == 0) + { + if (!((c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + c == '_')) + c = '_'; + } + else + { + if (!((c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + c == '_')) + c = '_'; + } + } +} + +void error_handler:: +warning (const SAXParseException& e) +{ + handle (e, s_warning); +} + +void error_handler:: +error (const SAXParseException& e) +{ + failed_ = true; + handle (e, s_error); +} + +void error_handler:: +fatalError (const SAXParseException& e) +{ + failed_ = true; + handle (e, s_fatal); +} + +void error_handler:: +handle (const SAXParseException& e, severity s) +{ + const XMLCh* xid (e.getPublicId ()); + + if (xid == 0) + xid = e.getSystemId (); + + char* id (XMLString::transcode (xid)); + char* msg (XMLString::transcode (e.getMessage ())); + + cerr << id << ":"; + +#if _XERCES_VERSION >= 30000 + cerr << e.getLineNumber () << ":" << e.getColumnNumber () << " "; +#else + XMLSSize_t l (e.getLineNumber ()); + XMLSSize_t c (e.getColumnNumber ()); + cerr << (l == -1 ? 0 : l) << ":" << (c == -1 ? 0 : c) << " "; +#endif + + cerr << (s == s_warning ? "warning: " : "error: ") << msg << endl; + + XMLString::release (&id); + XMLString::release (&msg); +} -- cgit v1.1