From 1ca6396a3dd284241de11bcaa210ad5836e8e5a8 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 8 Dec 2009 16:18:01 +0200 Subject: Multiple object model character encodings support Also add support for ISO-8859-1. --- xsd/cxx/elements.cxx | 275 ++++++++++++++++++++++++++++++++++- xsd/cxx/elements.hxx | 39 ++++- xsd/cxx/literal-map.cxx | 296 ++++++++++++++++++++++++++++++++++++++ xsd/cxx/literal-map.hxx | 23 +++ xsd/cxx/parser/cli.hxx | 2 + xsd/cxx/parser/elements.cxx | 3 + xsd/cxx/parser/elements.hxx | 1 + xsd/cxx/parser/generator.cxx | 82 +++++++++-- xsd/cxx/parser/generator.hxx | 2 + xsd/cxx/parser/name-processor.cxx | 16 ++- xsd/cxx/parser/name-processor.hxx | 6 +- xsd/cxx/parser/parser-header.cxx | 7 + xsd/cxx/parser/validator.cxx | 17 ++- xsd/cxx/tree/cli.hxx | 2 + xsd/cxx/tree/counter.cxx | 2 +- xsd/cxx/tree/elements.cxx | 3 + xsd/cxx/tree/elements.hxx | 1 + xsd/cxx/tree/generator.cxx | 73 ++++++++-- xsd/cxx/tree/generator.hxx | 2 + xsd/cxx/tree/name-processor.cxx | 15 +- xsd/cxx/tree/name-processor.hxx | 9 +- xsd/cxx/tree/tree-forward.cxx | 7 + xsd/cxx/tree/tree-header.cxx | 14 ++ xsd/cxx/tree/validator.cxx | 1 + xsd/elements.hxx | 1 - xsd/makefile | 20 ++- xsd/xsd.cxx | 70 ++++++++- 27 files changed, 926 insertions(+), 63 deletions(-) create mode 100644 xsd/cxx/literal-map.cxx create mode 100644 xsd/cxx/literal-map.hxx (limited to 'xsd') diff --git a/xsd/cxx/elements.cxx b/xsd/cxx/elements.cxx index fd23fc0..764d7da 100644 --- a/xsd/cxx/elements.cxx +++ b/xsd/cxx/elements.cxx @@ -8,7 +8,9 @@ #include #include // std::toupper +#include #include +#include #include using std::wcerr; @@ -111,7 +113,9 @@ namespace CXX Context:: Context (std::wostream& o, SemanticGraph::Schema& root, + StringLiteralMap const* string_literal_map_, NarrowString const& char_type__, + NarrowString const& char_encoding__, Boolean include_with_brackets__, NarrowString const& include_prefix__, NarrowString const& esymbol, @@ -125,8 +129,10 @@ namespace CXX : os (o), schema_root (root), char_type (char_type_), + char_encoding (char_encoding_), L (L_), string_type (string_type_), + string_literal_map (string_literal_map_), include_with_brackets (include_with_brackets_), include_prefix (include_prefix_), type_exp (type_exp_), @@ -135,6 +141,7 @@ namespace CXX ns_mapping_cache (ns_mapping_cache_), xs_ns_ (0), char_type_ (char_type__), + char_encoding_ (char_encoding__), L_ (char_type == L"wchar_t" ? L"L" : L""), include_with_brackets_ (include_with_brackets__), include_prefix_ (include_prefix__), @@ -177,7 +184,7 @@ namespace CXX xs_ns_ = dynamic_cast (n); } - // + // String type. // if (char_type == L"char") string_type_ = L"::std::string"; @@ -186,6 +193,16 @@ namespace CXX else string_type_ = L"::std::basic_string< " + char_type + L" >"; + // Default encoding. + // + if (!char_encoding) + { + if (char_type == L"char") + char_encoding = L"utf8"; + else + char_encoding = L"auto"; + } + // Default mapping. // nsr_mapping_.push_back ( @@ -615,6 +632,121 @@ namespace CXX return r; } + String + strlit_ascii (String const& str) + { + String r; + Size n (str.size ()); + + // In most common cases we will have that many chars. + // + r.reserve (n + 2); + + r += '"'; + + Boolean escape (false); + + for (Size i (0); i < n; ++i) + { + UnsignedLong u (Context::unicode_char (str, i)); // May advance i. + + // [128 - ] - unrepresentable + // 127 - \x7F + // [32 - 126] - as is + // [0 - 31] - \X or \xXX + // + + if (u < 32 || u == 127) + { + switch (u) + { + case L'\n': + { + r += L"\\n"; + break; + } + case L'\t': + { + r += L"\\t"; + break; + } + case L'\v': + { + r += L"\\v"; + break; + } + case L'\b': + { + r += L"\\b"; + break; + } + case L'\r': + { + r += L"\\r"; + break; + } + case L'\f': + { + r += L"\\f"; + break; + } + case L'\a': + { + r += L"\\a"; + break; + } + default: + { + r += charlit (u); + escape = true; + break; + } + } + } + else if (u < 127) + { + if (escape) + { + // Close and open the string so there are no clashes. + // + r += '"'; + r += '"'; + + escape = false; + } + + switch (u) + { + case L'"': + { + r += L"\\\""; + break; + } + case L'\\': + { + r += L"\\\\"; + break; + } + default: + { + r += static_cast (u); + break; + } + } + } + else + { + // Unrepresentable character. + // + throw UnrepresentableCharacter (str, i + 1); + } + } + + r += '"'; + + return r; + } + const UnsignedLong utf8_first_char_mask[5] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0 @@ -770,6 +902,126 @@ namespace CXX } String + strlit_iso8859_1 (String const& str) + { + String r; + Size n (str.size ()); + + // In most common cases we will have that many chars. + // + r.reserve (n + 2); + + r += '"'; + + Boolean escape (false); + + for (Size i (0); i < n; ++i) + { + UnsignedLong u (Context::unicode_char (str, i)); // May advance i. + + // [256 - ] - unrepresentable + // [127 - 255] - \xXX + // [32 - 126] - as is + // [0 - 31] - \X or \xXX + // + + if (u < 32) + { + switch (u) + { + case L'\n': + { + r += L"\\n"; + break; + } + case L'\t': + { + r += L"\\t"; + break; + } + case L'\v': + { + r += L"\\v"; + break; + } + case L'\b': + { + r += L"\\b"; + break; + } + case L'\r': + { + r += L"\\r"; + break; + } + case L'\f': + { + r += L"\\f"; + break; + } + case L'\a': + { + r += L"\\a"; + break; + } + default: + { + r += charlit (u); + escape = true; + break; + } + } + } + else if (u < 127) + { + if (escape) + { + // Close and open the string so there are no clashes. + // + r += '"'; + r += '"'; + + escape = false; + } + + switch (u) + { + case L'"': + { + r += L"\\\""; + break; + } + case L'\\': + { + r += L"\\\\"; + break; + } + default: + { + r += static_cast (u); + break; + } + } + } + else if (u < 256) + { + r += charlit (u); + escape = true; + } + else + { + // Unrepresentable character. + // + throw UnrepresentableCharacter (str, i + 1); + } + } + + r += '"'; + + return r; + } + + String strlit_utf32 (String const& str) { String r; @@ -886,8 +1138,27 @@ namespace CXX String Context:: strlit (String const& str) { + // First see if we have a custom mapping. + // + assert (string_literal_map != 0); + StringLiteralMap::ConstIterator i (string_literal_map->find (str)); + + if (i != string_literal_map->end ()) + return i->second; + if (char_type == L"char") - return strlit_utf8 (str); + { + if (char_encoding == L"utf8") + return strlit_utf8 (str); + else if (char_encoding == L"iso8859-1") + return strlit_iso8859_1 (str); + else + { + // For LCP, custom, and other unknown encodings, use ASCII. + // + return strlit_ascii (str); + } + } else return strlit_utf32 (str); } diff --git a/xsd/cxx/elements.hxx b/xsd/cxx/elements.hxx index 39eee77..3bbacd0 100644 --- a/xsd/cxx/elements.hxx +++ b/xsd/cxx/elements.hxx @@ -6,6 +6,8 @@ #ifndef CXX_ELEMENTS_HXX #define CXX_ELEMENTS_HXX +#include + #include #include #include @@ -17,8 +19,7 @@ #include #include - -#include +#include namespace CXX { @@ -36,6 +37,30 @@ namespace CXX // Exceptions. // + struct UnrepresentableCharacter + { + UnrepresentableCharacter (String const& str, Size pos) + : str_ (str), pos_ (pos) + { + } + + String const& + string () const + { + return str_; + } + + Size + position () const + { + return pos_; + } + + private: + String str_; + Size pos_; + }; + struct NoNamespaceMapping { NoNamespaceMapping (SemanticGraph::Path const& file, @@ -106,7 +131,6 @@ namespace CXX String reason_; }; - // // class Context @@ -124,7 +148,9 @@ namespace CXX public: Context (std::wostream& o, SemanticGraph::Schema& root, + StringLiteralMap const* custom_literals_map, NarrowString const& char_type__, + NarrowString const& char_encoding__, Boolean include_with_brackets__, NarrowString const& include_prefix__, NarrowString const& esymbol, @@ -141,8 +167,10 @@ namespace CXX : os (c.os), schema_root (c.schema_root), char_type (c.char_type), + char_encoding (c.char_encoding), L (c.L), string_type (c.string_type), + string_literal_map (c.string_literal_map), include_with_brackets (c.include_with_brackets), include_prefix (c.include_prefix), type_exp (c.type_exp), @@ -166,8 +194,10 @@ namespace CXX : os (o), schema_root (c.schema_root), char_type (c.char_type), + char_encoding (c.char_encoding), L (c.L), string_type (c.string_type), + string_literal_map (c.string_literal_map), include_with_brackets (c.include_with_brackets), include_prefix (c.include_prefix), type_exp (c.type_exp), @@ -309,8 +339,10 @@ namespace CXX SemanticGraph::Schema& schema_root; String& char_type; + String& char_encoding; String& L; // string literal prefix String& string_type; + StringLiteralMap const* string_literal_map; Boolean& include_with_brackets; String& include_prefix; @@ -326,6 +358,7 @@ namespace CXX SemanticGraph::Namespace* xs_ns_; String char_type_; + String char_encoding_; String L_; String string_type_; diff --git a/xsd/cxx/literal-map.cxx b/xsd/cxx/literal-map.cxx new file mode 100644 index 0000000..f3f7ee0 --- /dev/null +++ b/xsd/cxx/literal-map.cxx @@ -0,0 +1,296 @@ +// file : xsd/cxx/literal-map.cxx +// author : Boris Kolpackov +// copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC +// license : GNU GPL v2 + exceptions; see accompanying LICENSE file + +#include // std::auto_ptr +#include // std::size_t +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include + +#include + +using namespace std; +using namespace xercesc; +namespace XML = XSDFrontend::XML; + +namespace CXX +{ + class Handler: public DefaultHandler + { + public: + struct Failed {}; + + Handler (String const& file, StringLiteralMap& map) + : state_ (s_init), file_ (file), map_ (map) + { + } + + virtual void + setDocumentLocator (const Locator* const l) + { + locator_ = l; + } + + virtual Void + startElement (const XMLCh* const, + const XMLCh* const lname, + const XMLCh* const, + const xercesc::Attributes&) + { + String n (XML::transcode (lname)); + + if (n == L"string-literal-map" && state_ == s_init) + state_ = s_map; + else if (n == L"entry" && state_ == s_map) + { + str_seen_ = false; + lit_seen_ = false; + state_ = s_entry; + } + else if (n == L"string" && state_ == s_entry) + { + str_seen_ = true; + str_.clear (); + state_ = s_string; + } + else if (n == L"literal" && state_ == s_entry) + { + lit_seen_ = true; + lit_.clear (); + state_ = s_literal; + } + else + { + wcerr << file_ << ":" << line () << ":" << col () << ": error: " + << "unexpected element '" << n << "'" << endl; + throw Failed (); + } + } + + virtual Void + endElement (const XMLCh* const, + const XMLCh* const lname, + const XMLCh* const) + { + String n (XML::transcode (lname)); + + if (n == L"string-literal-map") + state_ = s_init; + else if (n == L"entry") + { + if (!str_seen_) + { + wcerr << file_ << ":" << line () << ":" << col () << ": error: " + << "expected 'string' element" << endl; + throw Failed (); + } + + if (!lit_seen_) + { + wcerr << file_ << ":" << line () << ":" << col () << ": error: " + << "expected 'literal' element" << endl; + throw Failed (); + } + + map_[str_] = lit_; + state_ = s_map; + } + else if (n == L"string") + state_ = s_entry; + else if (n == L"literal") + state_ = s_entry; + } + +#if _XERCES_VERSION >= 30000 + virtual Void + characters (const XMLCh* const s, const XMLSize_t length) +#else + virtual Void + characters (const XMLCh* const s, const unsigned int length) +#endif + { + String str (XML::transcode (s, length)); + + if (state_ == s_string) + str_ += str; + else if (state_ == s_literal) + lit_ += str; + else + { + for (Size i (0); i < str.size (); ++i) + { + WideChar c (str[i]); + + if (c != 0x20 && c != 0x0A && c != 0x0D && c != 0x09) + { + wcerr << file_ << ":" << line () << ":" << col () << ": error: " + << "unexpected character data" << endl; + throw Failed (); + } + } + } + } + + // Error hanlding. + // + enum Severity {s_warning, s_error, s_fatal}; + + virtual Void + warning (const SAXParseException& e) + { + handle (e, s_warning); + } + + virtual Void + error (const SAXParseException& e) + { + handle (e, s_error); + } + + virtual Void + fatalError (const SAXParseException& e) + { + handle (e, s_fatal); + } + + virtual Void + resetErrors () + { + } + + Void + handle (const SAXParseException& e, Severity s) + { + wcerr << file_ << ":"; + +#if _XERCES_VERSION >= 30000 + wcerr << e.getLineNumber () << ":" << e.getColumnNumber () << ": "; +#else + XMLSSize_t l (e.getLineNumber ()); + XMLSSize_t c (e.getColumnNumber ()); + wcerr << (l == -1 ? 0 : l) << ":" << (c == -1 ? 0 : c) << ": "; +#endif + + String msg (XML::transcode (e.getMessage ())); + wcerr << (s == s_warning ? "warning: " : "error: ") << msg << endl; + + if (s != s_warning) + throw Failed (); + } + + size_t + line () const + { + size_t r (0); + + if (locator_ != 0) + { +#if _XERCES_VERSION >= 30000 + r = static_cast (locator_->getLineNumber ()); +#else + XMLSSize_t l (locator_->getLineNumber ()); + r = l == -1 ? 0 : static_cast (l); +#endif + } + + return r; + } + + size_t + col () const + { + size_t r (0); + + if (locator_ != 0) + { +#if _XERCES_VERSION >= 30000 + r = static_cast (locator_->getColumnNumber ()); +#else + XMLSSize_t c (locator_->getColumnNumber ()); + r = c == -1 ? 0 : static_cast (c); +#endif + } + + return r; + } + + private: + const Locator* locator_; + + enum + { + s_init, + s_map, + s_entry, + s_string, + s_literal + } state_; + + String file_; + StringLiteralMap& map_; + + Boolean str_seen_; + Boolean lit_seen_; + + String str_; + String lit_; + }; + + bool + read_literal_map (NarrowString const& file, StringLiteralMap& map) + { + try + { + // Try to open the file with fstream. This way we get to + // report the error in a consistent manner. + // + { + ifstream ifs (file.c_str ()); + if (!ifs.is_open ()) + { + wcerr << file.c_str () << ": unable to open in read mode" << endl; + return false; + } + } + + String wfile (file); + + LocalFileInputSource is (XML::XMLChString (wfile).c_str ()); + Handler h (wfile, map); + + auto_ptr parser ( + XMLReaderFactory::createXMLReader ()); + + parser->setFeature (XMLUni::fgSAX2CoreNameSpaces, true); + parser->setFeature (XMLUni::fgSAX2CoreNameSpacePrefixes, true); + parser->setFeature (XMLUni::fgSAX2CoreValidation, false); + parser->setFeature (XMLUni::fgXercesSchema, false); + parser->setFeature (XMLUni::fgXercesSchemaFullChecking, false); + + parser->setErrorHandler (&h); + parser->setContentHandler (&h); + + parser->parse (is); + } + catch (Handler::Failed const&) + { + return false; + } + + return true; + } +} diff --git a/xsd/cxx/literal-map.hxx b/xsd/cxx/literal-map.hxx new file mode 100644 index 0000000..1120045 --- /dev/null +++ b/xsd/cxx/literal-map.hxx @@ -0,0 +1,23 @@ +// file : xsd/cxx/literal-map.hxx +// author : Boris Kolpackov +// copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC +// license : GNU GPL v2 + exceptions; see accompanying LICENSE file + +#ifndef CXX_LITERAL_MAP_HXX +#define CXX_LITERAL_MAP_HXX + +#include +#include + +namespace CXX +{ + using namespace Cult; + typedef WideString String; + + typedef Cult::Containers::Map StringLiteralMap; + + bool + read_literal_map (NarrowString const& file, StringLiteralMap& map); +} + +#endif // CXX_LITERAL_MAP_HXX diff --git a/xsd/cxx/parser/cli.hxx b/xsd/cxx/parser/cli.hxx index 504de43..5f31af7 100644 --- a/xsd/cxx/parser/cli.hxx +++ b/xsd/cxx/parser/cli.hxx @@ -24,6 +24,7 @@ namespace CXX typedef Char const Key[]; extern Key type_map; + extern Key char_encoding; extern Key char_type; extern Key output_dir; extern Key xml_parser; @@ -85,6 +86,7 @@ namespace CXX typedef Cult::CLI::Options< type_map, Cult::Containers::Vector, char_type, NarrowString, + char_encoding, NarrowString, output_dir, NarrowString, xml_parser, NarrowString, generate_inline, Boolean, diff --git a/xsd/cxx/parser/elements.cxx b/xsd/cxx/parser/elements.cxx index 8a02ffb..09d1008 100644 --- a/xsd/cxx/parser/elements.cxx +++ b/xsd/cxx/parser/elements.cxx @@ -42,12 +42,15 @@ namespace CXX Context (std::wostream& o, SemanticGraph::Schema& root, CLI::Options const& ops, + StringLiteralMap const* map, Regex const* he, Regex const* ie, Regex const* hie) : CXX::Context (o, root, + map, ops.value (), + ops.value (), ops.value (), ops.value (), ops.value (), diff --git a/xsd/cxx/parser/elements.hxx b/xsd/cxx/parser/elements.hxx index 90ff84e..61cde69 100644 --- a/xsd/cxx/parser/elements.hxx +++ b/xsd/cxx/parser/elements.hxx @@ -39,6 +39,7 @@ namespace CXX Context (std::wostream&, SemanticGraph::Schema&, CLI::Options const&, + StringLiteralMap const*, Regex const* hxx_expr, Regex const* ixx_expr, Regex const* hxx_impl_expr); diff --git a/xsd/cxx/parser/generator.cxx b/xsd/cxx/parser/generator.cxx index 342e3f2..ec08af4 100644 --- a/xsd/cxx/parser/generator.cxx +++ b/xsd/cxx/parser/generator.cxx @@ -126,9 +126,9 @@ namespace CXX { namespace CLI { - extern Key char_type; extern Key type_map = "type-map"; extern Key char_type = "char-type"; + extern Key char_encoding = "char-encoding"; extern Key output_dir = "output-dir"; extern Key xml_parser = "xml-parser"; extern Key generate_inline = "generate-inline"; @@ -206,6 +206,14 @@ namespace CXX << " values are 'char' (default) and 'wchar_t'." << endl; + e << "--char-encoding " << endl + << " Specify the character encoding that should be used\n" + << " in the object model. Valid values for the 'char'\n" + << " character type are 'utf8' (default), 'iso8859-1',\n" + << " 'lcp', and 'custom'. For the 'wchar_t' character\n" + << " type the only valid value is 'auto'." + << endl; + e << "--output-dir " << endl << " Write generated files to instead of current\n" << " directory." @@ -471,6 +479,11 @@ namespace CXX // Misc. // + e << "--custom-literals " << endl + << " Load custom XML string to C++ literal mappings\n" + << " from ." + << endl; + e << "--export-symbol " << endl << " Export symbol for Win32 DLL export/import control." << endl; @@ -600,6 +613,7 @@ namespace CXX generate (Parser::CLI::Options const& ops, Schema& schema, Path const& file_path, + StringLiteralMap const& string_literal_map, Boolean gen_driver, const WarningSet& disabled_warnings, FileList& file_list, @@ -648,7 +662,7 @@ namespace CXX // { NameProcessor proc; - proc.process (ops, schema, file_path); + proc.process (ops, schema, file_path, string_literal_map); } Boolean validation ((ops.value () == "expat" || @@ -701,7 +715,7 @@ namespace CXX String xns; { - Context ctx (std::wcerr, schema, ops, 0, 0, 0); + Context ctx (std::wcerr, schema, ops, 0, 0, 0, 0); xns = ctx.xs_ns_name (); } @@ -1144,7 +1158,13 @@ namespace CXX // HXX // { - Context ctx (hxx, schema, ops, &hxx_expr, &ixx_expr, &hxx_impl_expr); + Context ctx (hxx, + schema, + ops, + &string_literal_map, + &hxx_expr, + &ixx_expr, + &hxx_impl_expr); Indentation::Clip hxx_sloc (hxx); @@ -1231,7 +1251,13 @@ namespace CXX // if (inline_) { - Context ctx (ixx, schema, ops, &hxx_expr, &ixx_expr, &hxx_impl_expr); + Context ctx (ixx, + schema, + ops, + &string_literal_map, + &hxx_expr, + &ixx_expr, + &hxx_impl_expr); Indentation::Clip ixx_sloc (ixx); @@ -1287,7 +1313,13 @@ namespace CXX // if (source) { - Context ctx (cxx, schema, ops, &hxx_expr, &ixx_expr, &hxx_impl_expr); + Context ctx (cxx, + schema, + ops, + &string_literal_map, + &hxx_expr, + &ixx_expr, + &hxx_impl_expr); Indentation::Clip cxx_sloc (cxx); @@ -1351,8 +1383,13 @@ namespace CXX // if (impl) { - Context ctx (hxx_impl, schema, ops, - &hxx_expr, &ixx_expr, &hxx_impl_expr); + Context ctx (hxx_impl, + schema, + ops, + &string_literal_map, + &hxx_expr, + &ixx_expr, + &hxx_impl_expr); String guard (guard_expr.merge (guard_prefix + hxx_impl_name)); guard = ctx.escape (guard); // Make it a C++ id. @@ -1380,8 +1417,13 @@ namespace CXX // if (impl) { - Context ctx (cxx_impl, schema, ops, - &hxx_expr, &ixx_expr, &hxx_impl_expr); + Context ctx (cxx_impl, + schema, + ops, + &string_literal_map, + &hxx_expr, + &ixx_expr, + &hxx_impl_expr); // Set auto-indentation. // @@ -1397,8 +1439,13 @@ namespace CXX // if (driver) { - Context ctx (cxx_driver, schema, ops, - &hxx_expr, &ixx_expr, &hxx_impl_expr); + Context ctx (cxx_driver, + schema, + ops, + &string_literal_map, + &hxx_expr, + &ixx_expr, + &hxx_impl_expr); // Set auto-indentation. // @@ -1412,6 +1459,17 @@ namespace CXX return sloc; } + catch (UnrepresentableCharacter const& e) + { + wcerr << "error: character at position " << e.position () << " " + << "in string '" << e.string () << "' is unrepresentable in " + << "the target encoding" << endl; + + wcerr << "info: use the --custom-literals option to provide custom " + << "string literals mapping" << endl; + + throw Failed (); + } catch (NoNamespaceMapping const& e) { wcerr << e.file () << ":" << e.line () << ":" << e.column () diff --git a/xsd/cxx/parser/generator.hxx b/xsd/cxx/parser/generator.hxx index aaab3b8..8c5631d 100644 --- a/xsd/cxx/parser/generator.hxx +++ b/xsd/cxx/parser/generator.hxx @@ -18,6 +18,7 @@ #include +#include #include namespace CXX @@ -41,6 +42,7 @@ namespace CXX generate (CLI::Options const& options, XSDFrontend::SemanticGraph::Schema&, XSDFrontend::SemanticGraph::Path const& file, + StringLiteralMap const&, Boolean gen_driver, const WarningSet& disabled_warnings, FileList& file_list, diff --git a/xsd/cxx/parser/name-processor.cxx b/xsd/cxx/parser/name-processor.cxx index e9ba876..5f9209e 100644 --- a/xsd/cxx/parser/name-processor.cxx +++ b/xsd/cxx/parser/name-processor.cxx @@ -3,7 +3,6 @@ // copyright : Copyright (c) 2006-2009 Code Synthesis Tools CC // license : GNU GPL v2 + exceptions; see accompanying LICENSE file -#include #include #include @@ -35,10 +34,13 @@ namespace CXX public: Context (CLI::Options const& ops, SemanticGraph::Schema& root, - SemanticGraph::Path const& file) + SemanticGraph::Path const& file, + StringLiteralMap const* map) : CXX::Context (std::wcerr, root, + map, ops.value (), + ops.value (), ops.value (), ops.value (), ops.value (), @@ -1101,9 +1103,10 @@ namespace CXX Void process_impl (CLI::Options const& ops, SemanticGraph::Schema& tu, - SemanticGraph::Path const& file) + SemanticGraph::Path const& file, + StringLiteralMap const& map) { - Context ctx (ops, tu, file); + Context ctx (ops, tu, file, &map); if (tu.names_begin ()->named ().name () == L"http://www.w3.org/2001/XMLSchema") @@ -1196,9 +1199,10 @@ namespace CXX Void NameProcessor:: process (CLI::Options const& ops, SemanticGraph::Schema& tu, - SemanticGraph::Path const& file) + SemanticGraph::Path const& file, + StringLiteralMap const& map) { - process_impl (ops, tu, file); + process_impl (ops, tu, file, map); } } } diff --git a/xsd/cxx/parser/name-processor.hxx b/xsd/cxx/parser/name-processor.hxx index f7849c8..fee7027 100644 --- a/xsd/cxx/parser/name-processor.hxx +++ b/xsd/cxx/parser/name-processor.hxx @@ -6,10 +6,9 @@ #ifndef CXX_PARSER_NAME_PROCESSOR_HXX #define CXX_PARSER_NAME_PROCESSOR_HXX -#include - #include +#include #include namespace CXX @@ -26,7 +25,8 @@ namespace CXX Void process (CLI::Options const& ops, XSDFrontend::SemanticGraph::Schema&, - XSDFrontend::SemanticGraph::Path const& file); + XSDFrontend::SemanticGraph::Path const& file, + StringLiteralMap const& map); }; } } diff --git a/xsd/cxx/parser/parser-header.cxx b/xsd/cxx/parser/parser-header.cxx index 878a891..8ecd898 100644 --- a/xsd/cxx/parser/parser-header.cxx +++ b/xsd/cxx/parser/parser-header.cxx @@ -1324,6 +1324,13 @@ namespace CXX } else { + if (ctx.char_type == L"char" && + ctx.xml_parser == L"xerces" && + ctx.char_encoding != L"custom") + { + ctx.os << "#include " << endl; + } + ctx.os << "#include " << endl << "#include " << endl << "#include " << endl diff --git a/xsd/cxx/parser/validator.cxx b/xsd/cxx/parser/validator.cxx index 526c941..9b5d967 100644 --- a/xsd/cxx/parser/validator.cxx +++ b/xsd/cxx/parser/validator.cxx @@ -27,7 +27,7 @@ namespace CXX CLI::Options const& options, const WarningSet& disabled_warnings, Boolean& valid_) - : Context (std::wcerr, root, options, 0, 0, 0), + : Context (std::wcerr, root, options, 0, 0, 0, 0), disabled_warnings_ (disabled_warnings), disabled_warnings_all_ (false), valid (valid_), @@ -584,7 +584,20 @@ namespace CXX if (options.value () == "expat" && options.value () == "wchar_t") { - wcerr << "error: using expat with wchar_t is not yet supported" + wcerr << "error: using expat with wchar_t is not supported" + << endl; + + return false; + } + + // + // + if (options.value () == "expat" && + !options.value ().empty () && + options.value () != "utf8") + { + wcerr << "error: using expat with character encoding other than " + << "utf8 is not supported" << endl; return false; diff --git a/xsd/cxx/tree/cli.hxx b/xsd/cxx/tree/cli.hxx index 9ccf405..c9078e7 100644 --- a/xsd/cxx/tree/cli.hxx +++ b/xsd/cxx/tree/cli.hxx @@ -24,6 +24,7 @@ namespace CXX typedef Char const Key[]; extern Key char_type; + extern Key char_encoding; extern Key output_dir; extern Key generate_polymorphic; extern Key generate_serialization; @@ -119,6 +120,7 @@ namespace CXX typedef Cult::CLI::Options< char_type, NarrowString, + char_encoding, NarrowString, output_dir, NarrowString, generate_polymorphic, Boolean, generate_serialization, Boolean, diff --git a/xsd/cxx/tree/counter.cxx b/xsd/cxx/tree/counter.cxx index d8223bb..a9649b5 100644 --- a/xsd/cxx/tree/counter.cxx +++ b/xsd/cxx/tree/counter.cxx @@ -239,7 +239,7 @@ namespace CXX count (CLI::Options const& options, SemanticGraph::Schema& tu) { Counts counts; - Context ctx (std::wcerr, tu, options, counts, false, 0, 0, 0); + Context ctx (std::wcerr, tu, options, counts, false, 0, 0, 0, 0); Traversal::Schema schema; Traversal::Sources sources; diff --git a/xsd/cxx/tree/elements.cxx b/xsd/cxx/tree/elements.cxx index db1d858..444caa4 100644 --- a/xsd/cxx/tree/elements.cxx +++ b/xsd/cxx/tree/elements.cxx @@ -39,12 +39,15 @@ namespace CXX CLI::Options const& ops, Counts const& counts_, Boolean generate_xml_schema__, + StringLiteralMap const* map, Regex const* fe, Regex const* he, Regex const* ie) : CXX::Context (o, root, + map, ops.value (), + ops.value (), ops.value (), ops.value (), ops.value (), diff --git a/xsd/cxx/tree/elements.hxx b/xsd/cxx/tree/elements.hxx index 602291d..a0cb1d9 100644 --- a/xsd/cxx/tree/elements.hxx +++ b/xsd/cxx/tree/elements.hxx @@ -117,6 +117,7 @@ namespace CXX CLI::Options const& ops, Counts const& counts_, Boolean generate_xml_schema, + StringLiteralMap const*, Regex const* fwd_expr, Regex const* hxx_expr, Regex const* ixx_expr); diff --git a/xsd/cxx/tree/generator.cxx b/xsd/cxx/tree/generator.cxx index f9b055e..b81504c 100644 --- a/xsd/cxx/tree/generator.cxx +++ b/xsd/cxx/tree/generator.cxx @@ -116,6 +116,7 @@ namespace CXX namespace CLI { extern Key char_type = "char-type"; + extern Key char_encoding = "char-encoding"; extern Key output_dir = "output-dir"; extern Key generate_polymorphic = "generate-polymorphic"; extern Key generate_serialization = "generate-serialization"; @@ -220,12 +221,19 @@ namespace CXX << " values are 'char' (default) and 'wchar_t'." << endl; + e << "--char-encoding " << endl + << " Specify the character encoding that should be used\n" + << " in the object model. Valid values for the 'char'\n" + << " character type are 'utf8' (default), 'iso8859-1',\n" + << " 'lcp', and 'custom'. For the 'wchar_t' character\n" + << " type the only valid value is 'auto'." + << endl; + e << "--output-dir " << endl << " Write generated files to instead of current\n" << " directory." << endl; - e << "--generate-polymorphic" << endl << " Generate polymorphism-aware code. Specify this\n" << " option if you use substitution groups or xsi:type." @@ -670,6 +678,11 @@ namespace CXX << " separate the file name from the part number." << endl; + e << "--custom-literals " << endl + << " Load custom XML string to C++ literal mappings\n" + << " from ." + << endl; + e << "--export-symbol " << endl << " Export symbol for Win32 DLL export/import control." << endl; @@ -803,6 +816,7 @@ namespace CXX generate (Tree::CLI::Options const& ops, Schema& schema, Path const& file_path, + StringLiteralMap const& string_literal_map, const WarningSet& disabled_warnings, FileList& file_list, AutoUnlinks& unlinks) @@ -860,7 +874,7 @@ namespace CXX // { NameProcessor proc; - if (!proc.process (ops, schema, file_path)) + if (!proc.process (ops, schema, file_path, string_literal_map)) throw Failed (); } @@ -1179,8 +1193,15 @@ namespace CXX // if (forward) { - Context ctx (fwd, schema, ops, counts, generate_xml_schema, - &fwd_expr, &hxx_expr, &ixx_expr); + Context ctx (fwd, + schema, + ops, + counts, + generate_xml_schema, + &string_literal_map, + &fwd_expr, + &hxx_expr, + &ixx_expr); Indentation::Clip fwd_sloc (fwd); @@ -1287,8 +1308,15 @@ namespace CXX // HXX // { - Context ctx (hxx, schema, ops, counts, generate_xml_schema, - &fwd_expr, &hxx_expr, &ixx_expr); + Context ctx (hxx, + schema, + ops, + counts, + generate_xml_schema, + &string_literal_map, + &fwd_expr, + &hxx_expr, + &ixx_expr); Indentation::Clip hxx_sloc (hxx); @@ -1434,8 +1462,15 @@ namespace CXX // if (inline_) { - Context ctx (ixx, schema, ops, counts, generate_xml_schema, - &fwd_expr, &hxx_expr, &ixx_expr); + Context ctx (ixx, + schema, + ops, + counts, + generate_xml_schema, + &string_literal_map, + &fwd_expr, + &hxx_expr, + &ixx_expr); Indentation::Clip ixx_sloc (ixx); @@ -1560,8 +1595,15 @@ namespace CXX WideOutputFileStream& os (*cxx[part]); - Context ctx (os, schema, ops, counts, generate_xml_schema, - &fwd_expr, &hxx_expr, &ixx_expr); + Context ctx (os, + schema, + ops, + counts, + generate_xml_schema, + &string_literal_map, + &fwd_expr, + &hxx_expr, + &ixx_expr); Indentation::Clip cxx_sloc (os); @@ -1644,6 +1686,17 @@ namespace CXX return sloc; } + catch (UnrepresentableCharacter const& e) + { + wcerr << "error: character at position " << e.position () << " " + << "in string '" << e.string () << "' is unrepresentable in " + << "the target encoding" << endl; + + wcerr << "info: use the --custom-literals option to provide custom " + << "string literals mapping" << endl; + + throw Failed (); + } catch (NoNamespaceMapping const& e) { wcerr << e.file () << ":" << e.line () << ":" << e.column () diff --git a/xsd/cxx/tree/generator.hxx b/xsd/cxx/tree/generator.hxx index 1aa3c60..a66ede0 100644 --- a/xsd/cxx/tree/generator.hxx +++ b/xsd/cxx/tree/generator.hxx @@ -13,6 +13,7 @@ #include +#include #include namespace CXX @@ -36,6 +37,7 @@ namespace CXX generate (CLI::Options const& options, XSDFrontend::SemanticGraph::Schema&, XSDFrontend::SemanticGraph::Path const& file, + StringLiteralMap const&, const WarningSet& disabled_warnings, FileList& file_list, AutoUnlinks& unlinks); diff --git a/xsd/cxx/tree/name-processor.cxx b/xsd/cxx/tree/name-processor.cxx index 53027af..e15b072 100644 --- a/xsd/cxx/tree/name-processor.cxx +++ b/xsd/cxx/tree/name-processor.cxx @@ -4,7 +4,6 @@ // license : GNU GPL v2 + exceptions; see accompanying LICENSE file #include -#include #include @@ -43,12 +42,14 @@ namespace CXX Counts const& counts, Boolean generate_xml_schema, SemanticGraph::Schema& root, - SemanticGraph::Path const& file) + SemanticGraph::Path const& file, + StringLiteralMap const& map) : Tree::Context (std::wcerr, root, options, counts, generate_xml_schema, + &map, 0, 0, 0), @@ -1970,12 +1971,13 @@ namespace CXX Boolean process_impl (CLI::Options const& ops, SemanticGraph::Schema& tu, - SemanticGraph::Path const& file) + SemanticGraph::Path const& file, + StringLiteralMap const& map) { try { Counts counts; - Context ctx (ops, counts, false, tu, file); + Context ctx (ops, counts, false, tu, file, map); if (tu.names_begin ()->named ().name () == L"http://www.w3.org/2001/XMLSchema") @@ -2096,9 +2098,10 @@ namespace CXX Boolean NameProcessor:: process (CLI::Options const& ops, SemanticGraph::Schema& tu, - SemanticGraph::Path const& file) + SemanticGraph::Path const& file, + StringLiteralMap const& map) { - return process_impl (ops, tu, file); + return process_impl (ops, tu, file, map); } } } diff --git a/xsd/cxx/tree/name-processor.hxx b/xsd/cxx/tree/name-processor.hxx index 9b8eac9..18c3b82 100644 --- a/xsd/cxx/tree/name-processor.hxx +++ b/xsd/cxx/tree/name-processor.hxx @@ -6,11 +6,7 @@ #ifndef CXX_TREE_NAME_PROCESSOR_HXX #define CXX_TREE_NAME_PROCESSOR_HXX -#include - -#include - -#include +#include namespace CXX { @@ -26,7 +22,8 @@ namespace CXX Boolean process (CLI::Options const&, XSDFrontend::SemanticGraph::Schema&, - XSDFrontend::SemanticGraph::Path const& file); + XSDFrontend::SemanticGraph::Path const& file, + StringLiteralMap const&); }; } } diff --git a/xsd/cxx/tree/tree-forward.cxx b/xsd/cxx/tree/tree-forward.cxx index cceedb7..02c4317 100644 --- a/xsd/cxx/tree/tree-forward.cxx +++ b/xsd/cxx/tree/tree-forward.cxx @@ -152,6 +152,13 @@ namespace CXX } else { + if (ctx.char_type == L"char" && ctx.char_encoding != L"custom") + { + ctx.os << "#include " << endl + << endl; + } + ctx.os << "#include " << endl << "#include " << endl << "#include " << endl diff --git a/xsd/cxx/tree/tree-header.cxx b/xsd/cxx/tree/tree-header.cxx index 7bb630c..9b39739 100644 --- a/xsd/cxx/tree/tree-header.cxx +++ b/xsd/cxx/tree/tree-header.cxx @@ -3539,6 +3539,13 @@ namespace CXX { if (ctx.generate_xml_schema) { + if (ctx.char_type == L"char" && ctx.char_encoding != L"custom") + { + ctx.os << "#include " << endl + << endl; + } + ctx.os << "#include " << endl << "#include " << endl << "#include " << endl @@ -3671,6 +3678,13 @@ namespace CXX << "#include // std::binary_search" << endl << endl; + if (ctx.char_type == L"char" && ctx.char_encoding != L"custom") + { + ctx.os << "#include " << endl + << endl; + } + ctx.os << "#include " << endl << "#include " << endl << "#include " << endl diff --git a/xsd/cxx/tree/validator.cxx b/xsd/cxx/tree/validator.cxx index 7ef23fa..5742e7a 100644 --- a/xsd/cxx/tree/validator.cxx +++ b/xsd/cxx/tree/validator.cxx @@ -38,6 +38,7 @@ namespace CXX generate_xml_schema, 0, 0, + 0, 0), disabled_warnings_ (disabled_warnings), disabled_warnings_all_ (false), diff --git a/xsd/elements.hxx b/xsd/elements.hxx index bfde527..3948479 100644 --- a/xsd/elements.hxx +++ b/xsd/elements.hxx @@ -132,4 +132,3 @@ private: }; #endif // ELEMENTS_HXX - diff --git a/xsd/makefile b/xsd/makefile index e58b9dd..12990a6 100644 --- a/xsd/makefile +++ b/xsd/makefile @@ -7,7 +7,8 @@ include $(dir $(lastword $(MAKEFILE_LIST)))../build/bootstrap.make cxx_tun := xsd.cxx -cxx_tun += cxx/elements.cxx +cxx_tun += cxx/elements.cxx \ + cxx/literal-map.cxx cxx_tun += cxx/parser/elements.cxx \ cxx/parser/validator.cxx \ @@ -88,17 +89,22 @@ $(call import,\ $(scf_root)/import/libxsd-frontend/stub.make,\ l: xsd_fe.l,cpp-options: xsd_fe.l.cpp-options) +$(call import,\ + $(scf_root)/import/libxerces-c/stub.make,\ + l: xerces_c.l,cpp-options: xerces_c.l.cpp-options) + # Build. # $(xsd): $(cxx_obj) $(xsd_fe.l) $(be.l) $(cult.l) $(fs.l) $(re.l) $(xerces_c.l) $(cxx_obj) $(cxx_od): cpp_options := -I$(src_base) -$(cxx_obj) $(cxx_od): \ - $(xsd_fe.l.cpp-options) \ - $(be.l.cpp-options) \ - $(cult.l.cpp-options) \ - $(fs.l.cpp-options) \ - $(re.l.cpp-options) +$(cxx_obj) $(cxx_od): \ + $(xsd_fe.l.cpp-options) \ + $(be.l.cpp-options) \ + $(cult.l.cpp-options) \ + $(fs.l.cpp-options) \ + $(re.l.cpp-options) \ + $(xerces_c.l.cpp-options) $(call include-dep,$(cxx_od)) diff --git a/xsd/xsd.cxx b/xsd/xsd.cxx index 7aa18e6..2a67ae9 100644 --- a/xsd/xsd.cxx +++ b/xsd/xsd.cxx @@ -34,6 +34,8 @@ #include #include +#include + #include #include @@ -79,6 +81,7 @@ namespace CLI extern Key location_map = "location-map"; extern Key location_regex = "location-regex"; extern Key location_regex_trace = "location-regex-trace"; + extern Key custom_literals = "custom-literals"; extern Key file_per_type = "file-per-type"; extern Key type_file_regex = "type-file-regex"; extern Key type_file_regex_trace = "type-file-regex-trace"; @@ -101,6 +104,7 @@ namespace CLI location_map, NarrowStrings, location_regex, NarrowStrings, location_regex_trace, Boolean, + custom_literals, NarrowString, file_per_type, Boolean, type_file_regex, NarrowStrings, type_file_regex_trace, Boolean, @@ -188,11 +192,27 @@ private: Boolean trace_; }; +// +// +struct XercesInitializer +{ + XercesInitializer () + { + xercesc::XMLPlatformUtils::Initialize (); + } + + ~XercesInitializer () + { + xercesc::XMLPlatformUtils::Terminate (); + } +}; + // Expand the \n escape sequence. // Void expand_nl (NarrowString& s); + Int main (Int argc, Char* argv[]) { @@ -557,6 +577,22 @@ main (Int argc, Char* argv[]) common_ops.value (), common_ops.value ()); + // Load custom string literals, if any. + // + CXX::StringLiteralMap string_literal_map; + + if (NarrowString file = common_ops.value ()) + { + XercesInitializer xerces_init; + + if (!CXX::read_literal_map (file, string_literal_map)) + { + // Diagnostics has already been issued. + // + return 1; + } + } + if (!fpt) { // File-per-schema compilation mode. @@ -703,7 +739,13 @@ main (Int argc, Char* argv[]) try { sloc += CXX::Tree::Generator::generate ( - *tree_ops, *schema, tu, disabled_w, file_list, unlinks); + *tree_ops, + *schema, + tu, + string_literal_map, + disabled_w, + file_list, + unlinks); } catch (CXX::Tree::Generator::Failed const&) { @@ -717,7 +759,14 @@ main (Int argc, Char* argv[]) try { sloc += CXX::Parser::Generator::generate ( - *parser_ops, *schema, tu, true, disabled_w, file_list, unlinks); + *parser_ops, + *schema, + tu, + string_literal_map, + true, + disabled_w, + file_list, + unlinks); } catch (CXX::Parser::Generator::Failed const&) { @@ -837,7 +886,13 @@ main (Int argc, Char* argv[]) try { sloc += CXX::Tree::Generator::generate ( - *tree_ops, s, path, disabled_w, file_list, unlinks); + *tree_ops, + s, + path, + string_literal_map, + disabled_w, + file_list, + unlinks); } catch (CXX::Tree::Generator::Failed const&) { @@ -853,7 +908,14 @@ main (Int argc, Char* argv[]) // Only generate driver for the first schema. // sloc += CXX::Parser::Generator::generate ( - *parser_ops, s, path, i == b, disabled_w, file_list, unlinks); + *parser_ops, + s, + path, + string_literal_map, + i == b, + disabled_w, + file_list, + unlinks); } catch (CXX::Parser::Generator::Failed const&) { -- cgit v1.1