// file : examples/cxx/tree/embedded/xsdbin.cxx // copyright : not copyrighted - public domain // This program loads the XML Schema file(s) and converts them to // the Xerces-C++ binary schema format which can then be embedded // into C++ programs and used to validate XML documents. The output // is written as a C++ source file containing the array with the // binary data. // #include #include // std::auto_ptr #include // std::size_t #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; using namespace xercesc; class error_handler: public ErrorHandler { public: error_handler () : failed_ (false) { } bool failed () const { return failed_; } enum severity {s_warning, s_error, s_fatal}; virtual void warning (const SAXParseException&); virtual void error (const SAXParseException&); virtual void fatalError (const SAXParseException&); virtual void resetErrors () { failed_ = false; } void handle (const SAXParseException&, severity); private: bool failed_; }; void cxx_escape (string&); int main (int argc, char* argv[]) { const char* hxx_suffix = "-schema.hxx"; const char* cxx_suffix = "-schema.cxx"; string name; string base; string outdir; struct usage { usage (bool e = true): error (e) {} bool error; }; int argi (1); bool multi_import (true); bool verbose (false); try { for (; argi < argc; ++argi) { string a (argv[argi]); if (a == "--help") throw usage (false); else if (a == "--verbose") { verbose = true; } else if (a == "--hxx-suffix") { if (++argi >= argc) throw usage (); hxx_suffix = argv[argi]; } else if (a == "--cxx-suffix") { if (++argi >= argc) throw usage (); cxx_suffix = argv[argi]; } else if (a == "--output-dir") { if (++argi >= argc) throw usage (); outdir = argv[argi]; } else if (a == "--array-name") { if (++argi >= argc) throw usage (); name = argv[argi]; } else if (a == "--disable-multi-import") { multi_import = false; } else break; } if (argi >= argc) { cerr << "no input file specified" << endl; throw usage (); } base = argv[argi]; } catch (usage const& e) { ostream& o (e.error ? cerr : cout); o << "Usage: " << argv[0] << " [options] " << endl << "Options:" << endl << " --help Print usage information and exit." << endl << " --verbose Print progress information." << endl << " --output-dir Write generated files to ." << endl << " --hxx-suffix Header file suffix instead of '-schema.hxx'." << endl << " --cxx-suffix Source file suffix instead of '-schema.cxx'." << endl << " --array-name Binary data array name." << endl << " --disable-multi-import Disable multiple import support." << endl << endl; return e.error ? 0 : 1; } XMLPlatformUtils::Initialize (); { MemoryManager* mm (XMLPlatformUtils::fgMemoryManager); auto_ptr gp (new XMLGrammarPoolImpl (mm)); // Load the schemas into grammar pool. // { auto_ptr parser ( XMLReaderFactory::createXMLReader (mm, gp.get ())); parser->setFeature (XMLUni::fgSAX2CoreNameSpaces, true); parser->setFeature (XMLUni::fgSAX2CoreNameSpacePrefixes, true); parser->setFeature (XMLUni::fgSAX2CoreValidation, true); parser->setFeature (XMLUni::fgXercesSchema, true); parser->setFeature (XMLUni::fgXercesSchemaFullChecking, true); parser->setFeature (XMLUni::fgXercesValidationErrorAsFatal, true); // Xerces-C++ 3.1.0 is the first version with working multi import // support. // #if _XERCES_VERSION >= 30100 parser->setFeature (XMLUni::fgXercesHandleMultipleImports, multi_import); #endif error_handler eh; parser->setErrorHandler (&eh); for (; argi < argc; ++argi) { if (verbose) cerr << "loading " << argv[argi] << endl; if (!parser->loadGrammar (argv[argi], Grammar::SchemaGrammarType, true)) { cerr << argv[argi] << ": error: unable to load" << endl; return 1; } if (eh.failed ()) return 1; } } // Get the binary representation. // BinMemOutputStream data; try { gp->serializeGrammars (&data); } catch (const XSerializationException& e) { char* msg (XMLString::transcode (e.getMessage ())); cerr << "error: " << msg << endl; XMLString::release (&msg); return 1; } size_t n (static_cast (data.curPos ())); const unsigned char* buf ( static_cast (data.getRawBuffer ())); if (verbose) cerr << "uncomressed data size " << n << " bytes" << endl; // Compress zeros. // size_t cn (0); unsigned char* cbuf = new unsigned char[n]; size_t cseq (0); // Number of bytes left in a compression sequence. bool alt (false); // Alternating or sequential sequence. for (size_t i (0); i < n;) { unsigned char v (buf[i++]); // See if we are in a compression sequence. // if (cseq != 0) { // See if this byte needs to be copied. // if (alt && cseq % 2 == 0) cbuf[cn++] = v; cseq--; continue; } // If we are not in a compression sequence and this byte is // not zero then simply copy it. // if (v != 0) { cbuf[cn++] = v; continue; } // We have a zero. // cbuf[cn++] = 0; // See if we can start a new compression sequence. // if (i < n) { if (buf[i] == 0) { // Sequential sequence. See how far it runs. // alt = false; for (cseq = 1; cseq < 127 && cseq + i < n; cseq++) if (buf[cseq + i] != 0) break; } else if (i + 1 < n && buf[i + 1] == 0) { // Alternating sequence. See how far it runs. // alt = true; for (cseq = 1; cseq < 127 && cseq * 2 + i + 1 < n; cseq++) { if (buf[cseq * 2 + i + 1] != 0) break; // For longer sequences prefer sequential to alternating. // if (cseq > 2 && buf[cseq * 2 + i] == 0 && buf[(cseq - 1) * 2 + i] == 0 && buf[(cseq - 2) * 2 + i] == 0) { cseq -= 2; break; } } cseq *= 2; } } if (cseq != 0) { cbuf[cn++] = static_cast ( alt ? (128 | cseq / 2) : cseq); } else cbuf[cn++] = 0; } if (verbose) cerr << "comressed data size " << cn << " bytes" << endl; buf = cbuf; n = cn; // Figure out the file names. // string::size_type p (base.rfind ('/')), p1 (base.rfind ('\\')); if (p1 != string::npos && p1 > p) p = p1; if (p != string::npos) base = string (base, p + 1); p = base.rfind ('.'); if (p != string::npos) base.resize (p); string hxx (base + hxx_suffix); string cxx (base + cxx_suffix); if (!outdir.empty ()) { #if defined (WIN32) || defined (__WIN32__) hxx = outdir + '\\' + hxx; cxx = outdir + '\\' + cxx; #else hxx = outdir + '/' + hxx; cxx = outdir + '/' + cxx; #endif } if (name.empty ()) { name = base + "_schema"; cxx_escape (name); } // Write header. // { ofstream os (hxx.c_str ()); if (!os.is_open ()) { cerr << hxx << ": error: unable to open" << endl; return 1; } os << "// Automatically generated. Do not edit." << endl << "//" << endl << endl << "#include " << endl << endl << "extern const XMLByte " << name << "[" << n << "UL];" << endl; } { ofstream os (cxx.c_str ()); if (!os.is_open ()) { cerr << cxx << ": error: unable to open" << endl; return 1; } os << "// Automatically generated. Do not edit." << endl << "//" << endl << endl << "#include " << endl << "#include " << endl << endl << "#if XERCES_GRAMMAR_SERIALIZATION_LEVEL != " << XERCES_GRAMMAR_SERIALIZATION_LEVEL << endl << "# error incompatible Xerces-C++ version detected" << endl << "#endif" << endl << endl << "extern const XMLByte " << name << "[" << n << "UL] =" << endl << "{"; for (size_t i (0); i < n; ++i) { if (i != 0) os << ','; os << (i % 12 == 0 ? "\n " : " ") << "0x"; os.width (2); os.fill ('0'); os << hex << static_cast (buf[i]); } os << endl << "};" << endl << endl; } delete[] cbuf; } XMLPlatformUtils::Terminate (); } void cxx_escape (string& s) { for (string::size_type i (0); i < s.size (); ++i) { char& c (s[i]); if (i == 0) { if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_')) c = '_'; } else { if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_')) c = '_'; } } } void error_handler:: warning (const SAXParseException& e) { handle (e, s_warning); } void error_handler:: error (const SAXParseException& e) { failed_ = true; handle (e, s_error); } void error_handler:: fatalError (const SAXParseException& e) { failed_ = true; handle (e, s_fatal); } void error_handler:: handle (const SAXParseException& e, severity s) { const XMLCh* xid (e.getPublicId ()); if (xid == 0) xid = e.getSystemId (); char* id (XMLString::transcode (xid)); char* msg (XMLString::transcode (e.getMessage ())); cerr << id << ":" << e.getLineNumber () << ":" << e.getColumnNumber () << " " << (s == s_warning ? "warning: " : "error: ") << msg << endl; XMLString::release (&id); XMLString::release (&msg); }