// file : xsd/xsd.cxx // author : Boris Kolpackov // copyright : Copyright (c) 2005-2010 Code Synthesis Tools CC // license : GNU GPL v2 + exceptions; see accompanying LICENSE file #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "../libxsd/xsd/cxx/version.hxx" using namespace Cult::Types; typedef Cult::Containers::Vector NarrowStrings; namespace SemanticGraph = XSDFrontend::SemanticGraph; namespace Transformations = XSDFrontend::Transformations; using std::wcerr; using std::endl; namespace CLI { using namespace Cult::CLI; typedef Char const Key[]; extern Key help = "help"; extern Key version = "version"; extern Key proprietary_license = "proprietary-license"; typedef Cult::CLI::Options < help, Boolean, version, Boolean, proprietary_license, Boolean > HelpOptions; struct HelpOptionsSpec: Cult::CLI::OptionsSpec {}; extern Key disable_warning = "disable-warning"; extern Key sloc_limit = "sloc-limit"; extern Key morph_anonymous = "morph-anonymous"; extern Key preserve_anonymous = "preserve-anonymous"; extern Key anonymous_regex = "anonymous-regex"; extern Key anonymous_regex_trace = "anonymous-regex-trace"; extern Key location_map = "location-map"; extern Key location_regex = "location-regex"; extern Key location_regex_trace = "location-regex-trace"; extern Key custom_literals = "custom-literals"; extern Key file_per_type = "file-per-type"; extern Key type_file_regex = "type-file-regex"; extern Key type_file_regex_trace = "type-file-regex-trace"; extern Key file_list = "file-list"; extern Key file_list_prologue = "file-list-prologue"; extern Key file_list_epilogue = "file-list-epilogue"; extern Key file_list_delim = "file-list-delim"; extern Key disable_multi_import = "disable-multi-import"; // Undocumented. extern Key disable_full_check = "disable-full-check"; // Undocumented. typedef Cult::CLI::Options < disable_warning, Cult::Containers::Vector, sloc_limit, UnsignedLong, morph_anonymous, Boolean, preserve_anonymous, Boolean, anonymous_regex, NarrowStrings, anonymous_regex_trace, Boolean, location_map, NarrowStrings, location_regex, NarrowStrings, location_regex_trace, Boolean, custom_literals, NarrowString, file_per_type, Boolean, type_file_regex, NarrowStrings, type_file_regex_trace, Boolean, file_list, NarrowString, file_list_prologue, NarrowString, file_list_epilogue, NarrowString, file_list_delim, NarrowString, disable_multi_import, Boolean, disable_full_check, Boolean > CommonOptions; struct CommonOptionsSpec: Cult::CLI::OptionsSpec {}; } // // struct LocationTranslator: XSDFrontend::LocationTranslator { struct Failed {}; LocationTranslator (NarrowStrings const& map, NarrowStrings const& regex, Boolean trace); virtual NarrowString translate (NarrowString const&); private: typedef Cult::Containers::Map Map; typedef BackendElements::Regex::Expression Regex; typedef BackendElements::Regex::Format RegexFormat; typedef Cult::Containers::Vector RegexVector; typedef Cult::Containers::Map Cache; Map map_; RegexVector regex_; Cache cache_; Boolean trace_; }; // // struct AnonymousNameTranslator: Transformations::AnonymousNameTranslator { struct Failed {}; AnonymousNameTranslator (NarrowStrings const& regex, Boolean trace); virtual WideString translate (WideString const& file, WideString const& ns, WideString const& name, WideString const& xpath); private: typedef BackendElements::Regex::Expression Regex; typedef BackendElements::Regex::Format RegexFormat; typedef Cult::Containers::Vector RegexVector; RegexVector regex_; Boolean trace_; }; // // struct TypeSchemaTranslator: Transformations::TypeSchemaTranslator { struct Failed {}; TypeSchemaTranslator (NarrowStrings const& regex, Boolean trace); virtual WideString translate (WideString const& ns, WideString const& name); private: typedef BackendElements::Regex::Expression Regex; typedef BackendElements::Regex::Format RegexFormat; typedef Cult::Containers::Vector RegexVector; RegexVector regex_; Boolean trace_; }; // // struct XercesInitializer { XercesInitializer () { xercesc::XMLPlatformUtils::Initialize (); } ~XercesInitializer () { xercesc::XMLPlatformUtils::Terminate (); } }; // Expand the \n escape sequence. // Void expand_nl (NarrowString& s); Int main (Int argc, Char* argv[]) { std::wostream& e (wcerr); Cult::Trace::Log::instance ().level (0); try { CLI::FileArguments args (argc, argv, "--options-file"); CLI::HelpOptions help_options ( CLI::parse (CLI::HelpOptionsSpec (), args, CLI::UnknownMode::stop)); NarrowString cmd; if (args.size () > 1) { cmd = args[1]; args.erase (1); } if (help_options.value () || cmd == "version") { e << "CodeSynthesis XSD XML Schema to C++ compiler " << XSD_STR_VERSION << endl << "Copyright (C) 2005-2010 Code Synthesis Tools CC" << endl; if (!help_options.value () && cmd == "version") { // Parse the options after the command to detect trailing // --proprietary-license. // help_options = CLI::parse ( CLI::HelpOptionsSpec (), args, CLI::UnknownMode::stop); } if (help_options.value ()) { e << "The compiler was invoked in the Proprietary License mode. You " << "should have\nreceived a proprietary license from Code Synthesis " << "Tools CC that entitles\nyou to use it in this mode." << endl; } else { e << "This is free software; see the source for copying conditions. " << "There is NO\nwarranty; not even for MERCHANTABILITY or FITNESS " << "FOR A PARTICULAR PURPOSE." << endl; } return 0; } if (help_options.value () || cmd == "help") { if (cmd == "help" && args.size () > 1) { NarrowString arg (args[1]); if (arg == "cxx-tree") { e << "Usage: " << args[0] << " cxx-tree [options] file [file ...]" << endl << "Options:" << endl; CXX::Tree::Generator::usage (); } else if (arg == "cxx-parser") { e << "Usage: " << args[0] << " cxx-parser [options] file [file ...]" << endl << "Options:" << endl; CXX::Parser::Generator::usage (); } else { e << "error: unknown command '" << arg.c_str () << "'" << endl << "info: try '" << args[0] << " help' for the list of commands" << endl; return 1; } ::CLI::Indent::Clip< ::CLI::OptionsUsage, WideChar> clip (e); // Disable warning option. // e << "--disable-warning " << endl << " Disable printing warning with id . If 'all'\n" << " is specified for the warning id then all warnings\n" << " are disabled." << endl; // Anonymous morphing options. // e << "--preserve-anonymous" << endl << " Preserve anonymous types. By default anonymous\n" << " types are automatically named with names derived\n" << " from the enclosing elements/attributes." << endl; e << "--anonymous-regex " << endl << " Add the provided regular expression to the list of\n" << " regular expressions used to derive names for\n" << " anonymous types from the names of the enclosing\n" << " attributes/elements." << endl; e << "--anonymous-regex-trace" << endl << " Trace the process of applying regular expressions\n" << " specified with the --anonymous-regex option." << endl; // Location mapping options. // e << "--location-map
    =" << endl << " Map the original schema location
      that is\n" << " specified in the XML Schema include or import\n" << " elements to new schema location . Repeat\n" << " this option to map more than one schema location." << endl; e << "--location-regex " << endl << " Add to the list of regular expressions\n" << " used to map schema locations that are specified\n" << " in the XML Schema include or import elements." << endl; e << "--location-regex-trace" << endl << " Trace the process of applying regular expressions\n" << " specified with the --location-regex option." << endl; // File-per-type compilation mode options. // e << "--file-per-type" << endl << " Generate a separate set of C++ files for each\n" << " type defined in XML Schema." << endl; e << "--type-file-regex " << endl << " Add the provided regular expression to the list of\n" << " regular expressions used to translate type names\n" << " to file names when the --type-per-file option is\n" << " specified." << endl; e << "--type-file-regex-trace" << endl << " Trace the process of applying regular expressions\n" << " specified with the --type-file-regex option." << endl; // File list options. // e << "--file-list " << endl << " Write a list of generated C++ files to ." << endl; e << "--file-list-prologue

      " << endl << " Insert

      at the beginning of the file list. All\n" << " occurrences of the \\n character sequence in

      \n" << " are replaced with new lines." << endl; e << "--file-list-prologue " << endl << " Insert at the end of the file list. All\n" << " occurrences of the \\n character sequence in \n" << " are replaced with new lines." << endl; e << "--file-list-delim " << endl << " Delimit file names written to the file list with\n" << " instead of new lines. All occurrences of the\n" << " \\n character sequence in are replaced with\n" << " new lines." << endl; } else { e << "Usage: " << args[0] << " ..." << endl << "Commands:" << endl; e << " help Print usage information and exit. Use\n" << " 'help ' for command-specific options." << endl; e << " version Print version and exit." << endl; e << " cxx-tree Generate the C++/Tree mapping." << endl; e << " cxx-parser Generate the C++/Parser mapping." << endl; } return 0; } if (cmd.empty ()) { e << "error: no command specified" << endl << "info: try '" << args[0] << " help' for usage information" << endl; return 1; } if (cmd != "cxx-tree" && cmd != "cxx-parser") { e << "error: unknown command '" << cmd.c_str () << "'" << endl << "info: try '" << args[0] << " help' for the list of commands" << endl; return 1; } // We need to parse command line options before we can get to // the arguments. // CLI::CommonOptionsSpec common_spec; common_spec.option ().default_value ("\n"); CLI::CommonOptions common_ops ( CLI::parse ( common_spec, args, CLI::UnknownMode::skip, CLI::UnknownMode::skip)); WarningSet disabled_w; { typedef Cult::Containers::Vector Warnings; Warnings const& w (common_ops.value ()); for (Warnings::ConstIterator i (w.begin ()); i != w.end (); ++i) disabled_w.insert (*i); } Boolean disabled_w_all (disabled_w.find ("all") != disabled_w.end ()); if (common_ops.value () && !disabled_w_all && disabled_w.find ("D001") == disabled_w.end ()) { e << "warning D001: the --morph-anonymous option is on by default and " << "no longer required" << endl; } Evptr tree_ops; Evptr parser_ops; Boolean show_sloc (false); if (cmd == "cxx-tree") { tree_ops = new CXX::Tree::CLI::Options ( CLI::parse (CXX::Tree::Generator::options_spec (), args)); tree_ops->value () = common_ops.value (); show_sloc = tree_ops->value (); } else if (cmd == "cxx-parser") { parser_ops = new CXX::Parser::CLI::Options ( CLI::parse (CXX::Parser::Generator::options_spec (), args)); show_sloc = parser_ops->value (); } if (args.size () < 2) { e << "error: no input file specified" << endl; return 1; } Boolean fpt (common_ops.value ()); if (cmd == "cxx-tree" || cmd == "cxx-parser") { Boolean gen (false), use (false); if (cmd == "cxx-tree") { gen = tree_ops->value (); use = tree_ops->value (); } else if (cmd == "cxx-parser") { gen = parser_ops->value (); use = parser_ops->value (); } // Things get complicated when we are compiling several schemas at // once (non-file-per-type mode) and use the --generate-xml-schema/ // --extern-xml-schema options. The only way we can figure out which // file corresponds to XML Schema is if the --extern-xml-schema option // is also present. So we are going to require it for this case, // especially since it generally makes sense. // if (!fpt) { if (args.size () > 2 && gen && !use) { e << "error: --extern-xml-schema is required when compiling more " << "than one schema and --generate-xml-schema is specified" << endl; return 1; } if (args.size () == 2 && gen && use) { e << "error: --generate-xml-schema and --extern-xml-schema are " << "mutually exclusive when compiling a single schema" << endl; return 1; } } else { // The --file-per-type and --generate-xml-schema options are // incompatible. It also makes sense to use --file-per-type // and --extern-xml-schema. // if (gen) { e << "error: --file-per-type and --generate-xml-schema are " << "incompatible" << endl << "info: use --generate-xml-schema in a separate invocation " << "of the compiler" << endl; return 1; } if (!use && !disabled_w_all && disabled_w.find ("D002") == disabled_w.end ()) { e << "warning D002: --extern-xml-schema is recommended when " << "--file-per-type is specified to reduce generated code size" << endl; } } } // // FileList file_list; AutoUnlinks unlinks; UnsignedLong sloc (0); LocationTranslator loc_translator ( common_ops.value (), common_ops.value (), common_ops.value ()); AnonymousNameTranslator anon_translator ( common_ops.value (), common_ops.value ()); // Load custom string literals, if any. // CXX::StringLiteralMap string_literal_map; if (NarrowString file = common_ops.value ()) { XercesInitializer xerces_init; if (!CXX::read_literal_map (file, string_literal_map)) { // Diagnostics has already been issued. // return 1; } } if (!fpt) { // File-per-schema compilation mode. // for (Size i (1); i < args.size (); ++i) { // Parse schema. // SemanticGraph::Path tu; try { tu = SemanticGraph::Path (args[i], boost::filesystem::native); } catch (SemanticGraph::InvalidPath const&) { e << "error: '" << args[i] << "' is not a valid " << "filesystem path" << endl; return 1; } XSDFrontend::Parser parser ( cmd != "cxx-tree", !common_ops.value (), !common_ops.value (), loc_translator, disabled_w); Evptr schema; if (cmd == "cxx-tree" || cmd == "cxx-parser") { // See if we are generating code for the XML Schema namespace. // We could be compiling several schemas at once in which case // handling of the --generate-xml-schema option gets tricky: we // will need to rely on the presence of the --extern-xml-schema // to tell us which (fake) schema file corresponds to XML Schema. // Boolean gen_xml_schema (false); if (cmd == "cxx-tree") { gen_xml_schema = tree_ops->value (); if (gen_xml_schema) { if (NarrowString name = tree_ops->value ()) { if (tu.native_file_string () != name) gen_xml_schema = false; } } } else if (cmd == "cxx-parser") { gen_xml_schema = parser_ops->value (); if (gen_xml_schema) { if (NarrowString name = parser_ops->value ()) { if (tu.native_file_string () != name) gen_xml_schema = false; } } } if (gen_xml_schema) schema = parser.xml_schema (tu); else schema = parser.parse (tu); } else schema = parser.parse (tu); // Morph anonymous types. // if (!common_ops.value ()) { try { Transformations::Anonymous trans (anon_translator); trans.transform (*schema, tu, true); } catch (Transformations::Anonymous::Failed const&) { return 1; // Diagnostic has already been issued. } } // Simplify the schema graph. // if (cmd == "cxx-parser") { Transformations::Simplifier trans; trans.transform (*schema, tu); } // Try to rearrange definitions so that there is no forward // inheritance. // try { Processing::Inheritance::Processor proc; proc.process (*schema, tu); } catch (Processing::Inheritance::Processor::Failed const&) { return 1; // Diagnostic has already been issued. } // Normalize and annotate complex content restrictions. // if (cmd == "cxx-parser") { try { Transformations::Restriction trans; trans.transform (*schema, tu); } catch (Transformations::Restriction::Failed const&) { return 1; // Diagnostic has already been issued. } } // Calculate cardinality. // { Processing::Cardinality::Processor proc; proc.process (*schema, tu); } // Generate mapping. // if (cmd == "cxx-tree") { try { sloc += CXX::Tree::Generator::generate ( *tree_ops, *schema, tu, string_literal_map, disabled_w, file_list, unlinks); } catch (CXX::Tree::Generator::Failed const&) { // Diagnostic has already been issued. // return 1; } } else if (cmd == "cxx-parser") { try { sloc += CXX::Parser::Generator::generate ( *parser_ops, *schema, tu, string_literal_map, true, disabled_w, file_list, unlinks); } catch (CXX::Parser::Generator::Failed const&) { // Diagnostic has already been issued. // return 1; } } } } else { // File-per-type compilation mode. // SemanticGraph::Paths paths; for (Size i (1); i < args.size (); ++i) { try { paths.push_back ( SemanticGraph::Path (args[i], boost::filesystem::native)); } catch (SemanticGraph::InvalidPath const&) { e << "error: '" << args[i] << "' is not a valid " << "filesystem path" << endl; return 1; } } if (cmd == "cxx-parser" && paths.size () > 1 && parser_ops->value ()) { e << "info: generating test driver for the first schema only: '" << paths[0] << "'" << endl; } XSDFrontend::Parser parser ( cmd != "cxx-tree", !common_ops.value (), !common_ops.value (), loc_translator, disabled_w); Evptr schema (parser.parse (paths)); // Morph anonymous types. // if (!common_ops.value ()) { try { Transformations::Anonymous trans (anon_translator); trans.transform (*schema, "", false); } catch (Transformations::Anonymous::Failed const&) { return 1; // Diagnostic has already been issued. } } // Simplify the schema graph. // if (cmd == "cxx-parser") { Transformations::Simplifier trans; trans.transform (*schema, ""); } // Normalize and annotate complex content restrictions. // if (cmd == "cxx-parser") { try { Transformations::Restriction trans; trans.transform (*schema, ""); } catch (Transformations::Restriction::Failed const&) { return 1; // Diagnostic has already been issued. } } // Calculate cardinality. // { Processing::Cardinality::Processor proc; proc.process (*schema, ""); } // Rearrange the graph so that each type is in a seperate // schema file. // typedef Cult::Containers::Vector Schemas; TypeSchemaTranslator type_translator ( common_ops.value (), common_ops.value ()); Transformations::SchemaPerType trans (type_translator); Schemas schemas (trans.transform (*schema)); // Generate code. // for (Schemas::Iterator b (schemas.begin ()), i (b), e (schemas.end ()); i != e; ++i) { SemanticGraph::Schema& s (**i); SemanticGraph::Path path (s.used_begin ()->path ()); if (cmd == "cxx-tree") { try { sloc += CXX::Tree::Generator::generate ( *tree_ops, s, path, string_literal_map, disabled_w, file_list, unlinks); } catch (CXX::Tree::Generator::Failed const&) { // Diagnostic has already been issued. // return 1; } } else if (cmd == "cxx-parser") { try { // Only generate driver for the first schema. // sloc += CXX::Parser::Generator::generate ( *parser_ops, s, path, string_literal_map, i == b, disabled_w, file_list, unlinks); } catch (CXX::Parser::Generator::Failed const&) { // Diagnostic has already been issued. // return 1; } } } } // See if we need to produce the file list. // if (NarrowString fl = common_ops.value ()) { typedef boost::filesystem::ofstream OutputFileStream; try { OutputFileStream ofs; SemanticGraph::Path path (fl); ofs.open (fl, std::ios_base::out); if (!ofs.is_open ()) { wcerr << path << ": error: unable to open in write mode" << endl; return 1; } NarrowString d (common_ops.value ()); expand_nl (d); if (NarrowString p = common_ops.value ()) { expand_nl (p); ofs << p; } for (FileList::Iterator i (file_list.begin ()), e (file_list.end ()); i != e;) { ofs << *i; if (++i != e) ofs << d; } if (NarrowString e = common_ops.value ()) { expand_nl (e); ofs << e; } } catch (SemanticGraph::InvalidPath const&) { wcerr << "error: '" << fl.c_str () << "' is not a valid " << "filesystem path" << endl; return 1; } } if (show_sloc) e << "total: " << sloc << endl; if (UnsignedLong sloc_limit = common_ops.value ()) { if (sloc_limit < sloc) { e << "error: SLOC limit of " << sloc_limit << " lines has been exceeded" << endl; return 1; } } unlinks.cancel (); return 0; } catch (LocationTranslator::Failed const&) { // Diagnostic has already been issued. } catch (AnonymousNameTranslator::Failed const&) { // Diagnostic has already been issued. } catch (TypeSchemaTranslator::Failed const&) { // Diagnostic has already been issued. } catch (Transformations::SchemaPerType::Failed const&) { // Diagnostic has already been issued. } catch (XSDFrontend::InvalidSchema const&) { // Diagnostic has already been issued. } catch (CLI::UnexpectedOption const& e) { wcerr << "error: unknown option '" << e.option ().c_str () << "'" << endl << "info: try '" << argv[0] << " help' for usage information" << endl; } catch (CLI::OptionFormat const& e) { wcerr << "error: value for option '" << e.option ().c_str () << "' is invalid or missing" << endl << "info: try '" << argv[0] << " help' for usage information" << endl; } catch (CLI::OptionFile const& e) { if (e.value ()) wcerr << "error: " << e.value ().c_str () << ": " << e.description ().c_str () << endl; else wcerr << "error: missing --options-file argument" << endl; } return 1; } // LocationTranslator // LocationTranslator:: LocationTranslator (NarrowStrings const& map, NarrowStrings const& regex, Boolean trace) : trace_ (trace) { // Map. // for (NarrowStrings::ConstIterator i (map.begin ()); i != map.end (); ++i) { // Split the string in two parts at the last '='. // Size pos (i->rfind ('=')); if (pos == NarrowString::npos) { wcerr << "error: invalid location map: '" << i->c_str () << "': delimiter ('=') not found" << endl; throw Failed (); } map_[NarrowString (*i, 0, pos)] = NarrowString (*i, pos + 1); } // Regex. // for (NarrowStrings::ConstIterator i (regex.begin ()); i != regex.end (); ++i) { try { regex_.push_back (Regex (*i)); } catch (RegexFormat const& e) { wcerr << "error: invalid location regex: '" << e.expression ().c_str () << "': " << e.description ().c_str () << endl; throw Failed (); } } } NarrowString LocationTranslator:: translate (NarrowString const& l) { // First check the cache. // Cache::ConstIterator ci (cache_.find (l)); if (ci != cache_.end ()) return ci->second; // Then check the direct map. // Map::ConstIterator mi (map_.find (l)); if (mi != map_.end ()) { cache_[l] = mi->second; return mi->second; } // Finally try regex. // if (trace_) wcerr << "location: '" << l.c_str () << "'" << endl; for (RegexVector::ReverseIterator i (regex_.rbegin ()); i != regex_.rend (); ++i) { if (trace_) wcerr << "try: '" << i->pattern () << "' : "; if (i->match (l)) { NarrowString r (i->merge (l)); if (trace_) wcerr << "'" << r.c_str () << "' : +" << endl; cache_[l] = r; return r; } if (trace_) wcerr << '-' << endl; } // No match - return the original location. // cache_[l] = l; return l; } // AnonymousNameTranslator // AnonymousNameTranslator:: AnonymousNameTranslator (NarrowStrings const& regex, Boolean trace) : trace_ (trace) { for (NarrowStrings::ConstIterator i (regex.begin ()); i != regex.end (); ++i) { try { regex_.push_back (Regex (*i)); } catch (RegexFormat const& e) { wcerr << "error: invalid anonymous type regex: '" << e.expression () << "': " << e.description () << endl; throw Failed (); } } } WideString AnonymousNameTranslator:: translate (WideString const& file, WideString const& ns, WideString const& name, WideString const& xpath) { if (regex_.empty ()) return name; WideString s (file + L' ' + ns + L' ' + xpath); if (trace_) wcerr << "anonymous type: '" << s << "'" << endl; for (RegexVector::ReverseIterator i (regex_.rbegin ()); i != regex_.rend (); ++i) { if (trace_) wcerr << "try: '" << i->pattern () << "' : "; if (i->match (s)) { WideString r (i->merge (s)); if (trace_) wcerr << "'" << r << "' : +" << endl; return r; } if (trace_) wcerr << '-' << endl; } // No match - return the name. // return name; } // TypeSchemaTranslator // TypeSchemaTranslator:: TypeSchemaTranslator (NarrowStrings const& regex, Boolean trace) : trace_ (trace) { for (NarrowStrings::ConstIterator i (regex.begin ()); i != regex.end (); ++i) { try { regex_.push_back (Regex (*i)); } catch (RegexFormat const& e) { wcerr << "error: invalid type file regex: '" << e.expression () << "': " << e.description () << endl; throw Failed (); } } } WideString TypeSchemaTranslator:: translate (WideString const& ns, WideString const& name) { if (regex_.empty ()) return name; WideString s (ns + L' ' + name); if (trace_) wcerr << "type: '" << s << "'" << endl; for (RegexVector::ReverseIterator i (regex_.rbegin ()); i != regex_.rend (); ++i) { if (trace_) wcerr << "try: '" << i->pattern () << "' : "; if (i->match (s)) { WideString r (i->merge (s)); if (trace_) wcerr << "'" << r << "' : +" << endl; return r; } if (trace_) wcerr << '-' << endl; } // No match - return the type name. // return name; } // // Void expand_nl (NarrowString& s) { for (Size i (0); i < s.size ();) { if (s[i] == '\\' && (i + 1) < s.size () && s[i + 1] == 'n') { NarrowString tmp (s, 0, i); tmp += '\n'; tmp.append (s.c_str () + i + 2); s = tmp; } else ++i; } }