From 76d23e639004517db8f9469d64ac1789f8449365 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 7 Jan 2010 13:50:11 +0200 Subject: Add support for ISO-8859-1 as application encoding New runtime configuration parameter, XSDE_ENCODING. New option, --char-encoding. New test, tests/cxx/hybrid/iso8859-1. --- xsde/cxx/elements.cxx | 130 +++++++++++++++++++++++++- xsde/cxx/elements.hxx | 29 ++++++ xsde/cxx/hybrid/cli.hxx | 2 + xsde/cxx/hybrid/elements.cxx | 1 + xsde/cxx/hybrid/generator.cxx | 38 ++++++++ xsde/cxx/hybrid/parser-name-processor.cxx | 1 + xsde/cxx/hybrid/serializer-name-processor.cxx | 1 + xsde/cxx/hybrid/tree-name-processor.cxx | 1 + xsde/cxx/hybrid/tree-type-map.cxx | 1 + xsde/cxx/hybrid/validator.cxx | 12 +++ xsde/cxx/parser/cli.hxx | 2 + xsde/cxx/parser/elements.cxx | 1 + xsde/cxx/parser/generator.cxx | 36 +++++++ xsde/cxx/parser/name-processor.cxx | 1 + xsde/cxx/parser/parser-header.cxx | 16 +++- xsde/cxx/parser/validator.cxx | 10 ++ xsde/cxx/serializer/cli.hxx | 2 + xsde/cxx/serializer/elements.cxx | 1 + xsde/cxx/serializer/generator.cxx | 36 +++++++ xsde/cxx/serializer/name-processor.cxx | 1 + xsde/cxx/serializer/validator.cxx | 10 ++ 21 files changed, 329 insertions(+), 3 deletions(-) (limited to 'xsde/cxx') diff --git a/xsde/cxx/elements.cxx b/xsde/cxx/elements.cxx index be65fc1..f8d7132 100644 --- a/xsde/cxx/elements.cxx +++ b/xsde/cxx/elements.cxx @@ -114,6 +114,7 @@ namespace CXX SemanticGraph::Schema& root, Char const* name_key, NarrowString const& char_type__, + NarrowString const& char_encoding__, Boolean include_with_brackets__, NarrowString const& include_prefix__, NarrowString const& esymbol, @@ -128,6 +129,7 @@ namespace CXX schema_root (root), ename_key (ename_key_), char_type (char_type_), + char_encoding (char_encoding_), L (L_), string_type (string_type_), include_with_brackets (include_with_brackets_), @@ -139,6 +141,7 @@ namespace CXX xs_ns_ (0), ename_key_ (name_key), char_type_ (char_type__), + char_encoding_ (char_encoding__), L_ (char_type == L"wchar_t" ? L"L" : L""), include_with_brackets_ (include_with_brackets__), include_prefix_ (include_prefix__), @@ -819,6 +822,126 @@ namespace CXX } String + strlit_iso8859_1 (String const& str) + { + String r; + Size n (str.size ()); + + // In most common cases we will have that many chars. + // + r.reserve (n + 2); + + r += '"'; + + Boolean escape (false); + + for (Size i (0); i < n; ++i) + { + UnsignedLong u (Context::unicode_char (str, i)); // May advance i. + + // [256 - ] - unrepresentable + // [127 - 255] - \xXX + // [32 - 126] - as is + // [0 - 31] - \X or \xXX + // + + if (u < 32) + { + switch (u) + { + case L'\n': + { + r += L"\\n"; + break; + } + case L'\t': + { + r += L"\\t"; + break; + } + case L'\v': + { + r += L"\\v"; + break; + } + case L'\b': + { + r += L"\\b"; + break; + } + case L'\r': + { + r += L"\\r"; + break; + } + case L'\f': + { + r += L"\\f"; + break; + } + case L'\a': + { + r += L"\\a"; + break; + } + default: + { + r += charlit (u); + escape = true; + break; + } + } + } + else if (u < 127) + { + if (escape) + { + // Close and open the string so there are no clashes. + // + r += '"'; + r += '"'; + + escape = false; + } + + switch (u) + { + case L'"': + { + r += L"\\\""; + break; + } + case L'\\': + { + r += L"\\\\"; + break; + } + default: + { + r += static_cast (u); + break; + } + } + } + else if (u < 256) + { + r += charlit (u); + escape = true; + } + else + { + // Unrepresentable character. + // + throw UnrepresentableCharacter (str, i + 1); + } + } + + r += '"'; + + return r; + } + + String strlit_utf32 (String const& str) { String r; @@ -936,7 +1059,12 @@ namespace CXX strlit (String const& str) { if (char_type == L"char") - return strlit_utf8 (str); + { + if (char_encoding == L"iso8859-1") + return strlit_iso8859_1 (str); + else + return strlit_utf8 (str); + } else return strlit_utf32 (str); } diff --git a/xsde/cxx/elements.hxx b/xsde/cxx/elements.hxx index d5c5a3c..67b51db 100644 --- a/xsde/cxx/elements.hxx +++ b/xsde/cxx/elements.hxx @@ -35,6 +35,30 @@ namespace CXX // Exceptions. // + struct UnrepresentableCharacter + { + UnrepresentableCharacter (String const& str, Size pos) + : str_ (str), pos_ (pos) + { + } + + String const& + string () const + { + return str_; + } + + Size + position () const + { + return pos_; + } + + private: + String str_; + Size pos_; + }; + struct NoNamespaceMapping { NoNamespaceMapping (SemanticGraph::Path const& file, @@ -124,6 +148,7 @@ namespace CXX SemanticGraph::Schema& root, Char const* name_key, NarrowString const& char_type__, + NarrowString const& char_encoding__, Boolean include_with_brackets__, NarrowString const& include_prefix__, NarrowString const& esymbol, @@ -141,6 +166,7 @@ namespace CXX schema_root (c.schema_root), ename_key (c.ename_key), char_type (c.char_type), + char_encoding (c.char_encoding), L (c.L), string_type (c.string_type), include_with_brackets (c.include_with_brackets), @@ -167,6 +193,7 @@ namespace CXX schema_root (c.schema_root), ename_key (c.ename_key), char_type (c.char_type), + char_encoding (c.char_encoding), L (c.L), string_type (c.string_type), include_with_brackets (c.include_with_brackets), @@ -296,6 +323,7 @@ namespace CXX NarrowString const& ename_key; String& char_type; + String& char_encoding; String& L; // string literal prefix String& string_type; @@ -315,6 +343,7 @@ namespace CXX NarrowString const ename_key_; String char_type_; + String char_encoding_; String L_; String string_type_; diff --git a/xsde/cxx/hybrid/cli.hxx b/xsde/cxx/hybrid/cli.hxx index ff89942..3834767 100644 --- a/xsde/cxx/hybrid/cli.hxx +++ b/xsde/cxx/hybrid/cli.hxx @@ -23,6 +23,7 @@ namespace CXX typedef Char const Key[]; + extern Key char_encoding; extern Key no_stl; extern Key no_iostream; extern Key no_exceptions; @@ -112,6 +113,7 @@ namespace CXX extern Key proprietary_license; typedef Cult::CLI::Options< + char_encoding, NarrowString, no_stl, Boolean, no_iostream, Boolean, no_exceptions, Boolean, diff --git a/xsde/cxx/hybrid/elements.cxx b/xsde/cxx/hybrid/elements.cxx index e08cf8b..27e983a 100644 --- a/xsde/cxx/hybrid/elements.cxx +++ b/xsde/cxx/hybrid/elements.cxx @@ -20,6 +20,7 @@ namespace CXX root, "name", "char", + ops.value (), ops.value (), ops.value (), "", // export symbol diff --git a/xsde/cxx/hybrid/generator.cxx b/xsde/cxx/hybrid/generator.cxx index 311d5d7..14cba6a 100644 --- a/xsde/cxx/hybrid/generator.cxx +++ b/xsde/cxx/hybrid/generator.cxx @@ -111,6 +111,7 @@ namespace CXX { namespace CLI { + extern Key char_encoding = "char-encoding"; extern Key no_stl = "no-stl"; extern Key no_iostream = "no-iostream"; extern Key no_exceptions = "no-exceptions"; @@ -207,6 +208,12 @@ namespace CXX std::wostream& e (wcerr); ::CLI::Indent::Clip< ::CLI::OptionsUsage, WideChar> clip (e); + e << "--char-encoding " << endl + << " Specify the character encoding that should be\n" + << " used in the object model. Valid values are 'utf8'\n" + << " (default) and 'iso8859-1'." + << endl; + e << "--no-stl" << endl << " Generate code that does not use STL." << endl; @@ -704,6 +711,8 @@ namespace CXX { CLI::OptionsSpec spec; + spec.option ().default_value ("utf8"); + spec.option ().default_value ("-pskel"); spec.option ().default_value ("-sskel"); spec.option ().default_value ("_pskel"); @@ -814,6 +823,7 @@ namespace CXX Evptr r (new P::Options); + r->value () = h.value (); r->value () = h.value (); r->value () = h.value (); r->value () = h.value (); @@ -895,6 +905,7 @@ namespace CXX Evptr r (new S::Options); + r->value () = h.value (); r->value () = h.value (); r->value () = h.value (); r->value () = h.value (); @@ -1527,6 +1538,25 @@ namespace CXX hxx << "#include " << endl << endl; + if (ops.value () == "iso8859-1") + { + hxx << "#ifndef XSDE_ENCODING_ISO8859_1" << endl + << "#error the generated code uses the ISO-8859-1 encoding" << + "while the XSD/e runtime does not (reconfigure the runtime " << + "or change the --char-encoding value)" << endl + << "#endif" << endl + << endl; + } + else + { + hxx << "#ifndef XSDE_ENCODING_UTF8" << endl + << "#error the generated code uses the UTF-8 encoding" << + "while the XSD/e runtime does not (reconfigure the runtime " << + "or change the --char-encoding value)" << endl + << "#endif" << endl + << endl; + } + if (ops.value ()) { hxx << "#ifdef XSDE_STL" << endl @@ -2613,6 +2643,14 @@ namespace CXX return sloc; } + catch (UnrepresentableCharacter const& e) + { + wcerr << "error: character at position " << e.position () << " " + << "in string '" << e.string () << "' is unrepresentable in " + << "the target encoding" << endl; + + throw Failed (); + } catch (NoNamespaceMapping const& e) { wcerr << e.file () << ":" << e.line () << ":" << e.column () diff --git a/xsde/cxx/hybrid/parser-name-processor.cxx b/xsde/cxx/hybrid/parser-name-processor.cxx index a8d3639..afb0df4 100644 --- a/xsde/cxx/hybrid/parser-name-processor.cxx +++ b/xsde/cxx/hybrid/parser-name-processor.cxx @@ -36,6 +36,7 @@ namespace CXX root, "name", "char", + ops.value (), ops.value (), ops.value (), "", // export symbol diff --git a/xsde/cxx/hybrid/serializer-name-processor.cxx b/xsde/cxx/hybrid/serializer-name-processor.cxx index 34f205a..5da88b9 100644 --- a/xsde/cxx/hybrid/serializer-name-processor.cxx +++ b/xsde/cxx/hybrid/serializer-name-processor.cxx @@ -35,6 +35,7 @@ namespace CXX root, "name", "char", + ops.value (), ops.value (), ops.value (), "", // export symbol diff --git a/xsde/cxx/hybrid/tree-name-processor.cxx b/xsde/cxx/hybrid/tree-name-processor.cxx index 2ee30ef..b2a2c69 100644 --- a/xsde/cxx/hybrid/tree-name-processor.cxx +++ b/xsde/cxx/hybrid/tree-name-processor.cxx @@ -38,6 +38,7 @@ namespace CXX root, "name", "char", + ops.value (), ops.value (), ops.value (), "", // export symbol diff --git a/xsde/cxx/hybrid/tree-type-map.cxx b/xsde/cxx/hybrid/tree-type-map.cxx index 5f1c36b..1b22ff9 100644 --- a/xsde/cxx/hybrid/tree-type-map.cxx +++ b/xsde/cxx/hybrid/tree-type-map.cxx @@ -27,6 +27,7 @@ namespace CXX root, "name", "char", + ops.value (), ops.value (), ops.value (), "", // export symbol diff --git a/xsde/cxx/hybrid/validator.cxx b/xsde/cxx/hybrid/validator.cxx index f95bd96..716f072 100644 --- a/xsde/cxx/hybrid/validator.cxx +++ b/xsde/cxx/hybrid/validator.cxx @@ -489,6 +489,18 @@ namespace CXX Boolean valid (true); ValidationContext ctx (root, options, disabled_warnings, valid); + // + // + NarrowString enc (options.value ()); + + if (enc != "utf8" && enc != "iso8859-1") + { + wcerr << "error: unknown encoding '" << enc.c_str () << "'" << endl; + return false; + } + + // + // Boolean par (options.value ()); Boolean ser (options.value ()); Boolean agg (options.value ()); diff --git a/xsde/cxx/parser/cli.hxx b/xsde/cxx/parser/cli.hxx index 62a1891..a50c3ff 100644 --- a/xsde/cxx/parser/cli.hxx +++ b/xsde/cxx/parser/cli.hxx @@ -24,6 +24,7 @@ namespace CXX typedef Char const Key[]; extern Key type_map; + extern Key char_encoding; extern Key no_stl; extern Key no_iostream; extern Key no_exceptions; @@ -86,6 +87,7 @@ namespace CXX typedef Cult::CLI::Options< type_map, Cult::Containers::Vector, + char_encoding, NarrowString, no_stl, Boolean, no_iostream, Boolean, no_exceptions, Boolean, diff --git a/xsde/cxx/parser/elements.cxx b/xsde/cxx/parser/elements.cxx index 4b624ff..3c62b5b 100644 --- a/xsde/cxx/parser/elements.cxx +++ b/xsde/cxx/parser/elements.cxx @@ -20,6 +20,7 @@ namespace CXX root, "p:name", "char", + ops.value (), ops.value (), ops.value (), "", // export symbol diff --git a/xsde/cxx/parser/generator.cxx b/xsde/cxx/parser/generator.cxx index e59180c..1936286 100644 --- a/xsde/cxx/parser/generator.cxx +++ b/xsde/cxx/parser/generator.cxx @@ -113,6 +113,7 @@ namespace CXX namespace CLI { extern Key type_map = "type-map"; + extern Key char_encoding = "char-encoding"; extern Key no_stl = "no-stl"; extern Key no_iostream = "no-iostream"; extern Key no_exceptions = "no-exceptions"; @@ -188,6 +189,12 @@ namespace CXX << " order of appearance and the first match is used." << endl; + e << "--char-encoding " << endl + << " Specify the character encoding that should be\n" + << " used for the extracted text data. Valid values\n" + << " are 'utf8' (default) and 'iso8859-1'." + << endl; + e << "--no-stl" << endl << " Generate code that does not use STL." << endl; @@ -514,6 +521,8 @@ namespace CXX { CLI::OptionsSpec spec; + spec.option ().default_value ("utf8"); + spec.option ().default_value ("-pskel"); spec.option ().default_value ("_pskel"); spec.option ().default_value ("-pimpl"); @@ -1207,6 +1216,25 @@ namespace CXX hxx << "#include " << endl << endl; + if (ops.value () == "iso8859-1") + { + hxx << "#ifndef XSDE_ENCODING_ISO8859_1" << endl + << "#error the generated code uses the ISO-8859-1 encoding" << + "while the XSD/e runtime does not (reconfigure the runtime " << + "or change the --char-encoding value)" << endl + << "#endif" << endl + << endl; + } + else + { + hxx << "#ifndef XSDE_ENCODING_UTF8" << endl + << "#error the generated code uses the UTF-8 encoding" << + "while the XSD/e runtime does not (reconfigure the runtime " << + "or change the --char-encoding value)" << endl + << "#endif" << endl + << endl; + } + if (ops.value ()) { hxx << "#ifdef XSDE_STL" << endl @@ -1585,6 +1613,14 @@ namespace CXX return sloc; } + catch (UnrepresentableCharacter const& e) + { + wcerr << "error: character at position " << e.position () << " " + << "in string '" << e.string () << "' is unrepresentable in " + << "the target encoding" << endl; + + throw Failed (); + } catch (NoNamespaceMapping const& e) { wcerr << e.file () << ":" << e.line () << ":" << e.column () diff --git a/xsde/cxx/parser/name-processor.cxx b/xsde/cxx/parser/name-processor.cxx index 658b70a..a168c28 100644 --- a/xsde/cxx/parser/name-processor.cxx +++ b/xsde/cxx/parser/name-processor.cxx @@ -34,6 +34,7 @@ namespace CXX root, "p:name", "char", + ops.value (), ops.value (), ops.value (), "", // export symbol diff --git a/xsde/cxx/parser/parser-header.cxx b/xsde/cxx/parser/parser-header.cxx index f3dd45c..6ef9578 100644 --- a/xsde/cxx/parser/parser-header.cxx +++ b/xsde/cxx/parser/parser-header.cxx @@ -1634,6 +1634,14 @@ namespace CXX << "typedef xsde::cxx::parser::context parser_context;" << endl; + if (char_encoding == L"iso8859-1") + { + os << "// ISO-8859-1 transcoder." << endl + << "//" << endl + << "using xsde::cxx::iso8859_1;" + << endl; + } + post (ns); } }; @@ -1675,8 +1683,12 @@ namespace CXX else { ctx.os << "#include " << endl - << "#include " << endl - << endl; + << "#include " << endl; + + if (ctx.char_encoding == L"iso8859-1") + ctx.os << "#include " << endl; + + ctx.os << endl; // Data types. // diff --git a/xsde/cxx/parser/validator.cxx b/xsde/cxx/parser/validator.cxx index 96170de..33fe2e2 100644 --- a/xsde/cxx/parser/validator.cxx +++ b/xsde/cxx/parser/validator.cxx @@ -601,6 +601,16 @@ namespace CXX // // + NarrowString enc (options.value ()); + + if (enc != "utf8" && enc != "iso8859-1") + { + wcerr << "error: unknown encoding '" << enc.c_str () << "'" << endl; + return false; + } + + // + // if (options.value () && options.value ()) { diff --git a/xsde/cxx/serializer/cli.hxx b/xsde/cxx/serializer/cli.hxx index 41bdde2..d65ca1a 100644 --- a/xsde/cxx/serializer/cli.hxx +++ b/xsde/cxx/serializer/cli.hxx @@ -24,6 +24,7 @@ namespace CXX typedef Char const Key[]; extern Key type_map; + extern Key char_encoding; extern Key no_stl; extern Key no_iostream; extern Key no_exceptions; @@ -85,6 +86,7 @@ namespace CXX typedef Cult::CLI::Options< type_map, Cult::Containers::Vector, + char_encoding, NarrowString, no_stl, Boolean, no_iostream, Boolean, no_exceptions, Boolean, diff --git a/xsde/cxx/serializer/elements.cxx b/xsde/cxx/serializer/elements.cxx index 495b867..f59a102 100644 --- a/xsde/cxx/serializer/elements.cxx +++ b/xsde/cxx/serializer/elements.cxx @@ -20,6 +20,7 @@ namespace CXX root, "s:name", "char", + ops.value (), ops.value (), ops.value (), "", // export symbol diff --git a/xsde/cxx/serializer/generator.cxx b/xsde/cxx/serializer/generator.cxx index c15d603..cf6091d 100644 --- a/xsde/cxx/serializer/generator.cxx +++ b/xsde/cxx/serializer/generator.cxx @@ -111,6 +111,7 @@ namespace CXX namespace CLI { extern Key type_map = "type-map"; + extern Key char_encoding = "char-encoding"; extern Key no_stl = "no-stl"; extern Key no_iostream = "no-iostream"; extern Key no_exceptions = "no-exceptions"; @@ -185,6 +186,12 @@ namespace CXX << " order of appearance and the first match is used." << endl; + e << "--char-encoding " << endl + << " Specify the character encoding that is used in\n" + << " the text data being serialized. Valid values are\n" + << " 'utf8' (default) and 'iso8859-1'." + << endl; + e << "--no-stl" << endl << " Generate code that does not use STL." << endl; @@ -507,6 +514,8 @@ namespace CXX { CLI::OptionsSpec spec; + spec.option ().default_value ("utf8"); + spec.option ().default_value ("-sskel"); spec.option ().default_value ("_sskel"); spec.option ().default_value ("-simpl"); @@ -1193,6 +1202,25 @@ namespace CXX hxx << "#include " << endl << endl; + if (ops.value () == "iso8859-1") + { + hxx << "#ifndef XSDE_ENCODING_ISO8859_1" << endl + << "#error the generated code uses the ISO-8859-1 encoding" << + "while the XSD/e runtime does not (reconfigure the runtime " << + "or change the --char-encoding value)" << endl + << "#endif" << endl + << endl; + } + else + { + hxx << "#ifndef XSDE_ENCODING_UTF8" << endl + << "#error the generated code uses the UTF-8 encoding" << + "while the XSD/e runtime does not (reconfigure the runtime " << + "or change the --char-encoding value)" << endl + << "#endif" << endl + << endl; + } + if (ops.value ()) { hxx << "#ifdef XSDE_STL" << endl @@ -1570,6 +1598,14 @@ namespace CXX return sloc; } + catch (UnrepresentableCharacter const& e) + { + wcerr << "error: character at position " << e.position () << " " + << "in string '" << e.string () << "' is unrepresentable in " + << "the target encoding" << endl; + + throw Failed (); + } catch (NoNamespaceMapping const& e) { wcerr << e.file () << ":" << e.line () << ":" << e.column () diff --git a/xsde/cxx/serializer/name-processor.cxx b/xsde/cxx/serializer/name-processor.cxx index e388169..ddc9c4e 100644 --- a/xsde/cxx/serializer/name-processor.cxx +++ b/xsde/cxx/serializer/name-processor.cxx @@ -34,6 +34,7 @@ namespace CXX root, "s:name", "char", + ops.value (), ops.value (), ops.value (), "", // export symbol diff --git a/xsde/cxx/serializer/validator.cxx b/xsde/cxx/serializer/validator.cxx index 50948b9..c163459 100644 --- a/xsde/cxx/serializer/validator.cxx +++ b/xsde/cxx/serializer/validator.cxx @@ -410,6 +410,16 @@ namespace CXX // // + NarrowString enc (options.value ()); + + if (enc != "utf8" && enc != "iso8859-1") + { + wcerr << "error: unknown encoding '" << enc.c_str () << "'" << endl; + return false; + } + + // + // { Boolean ref (options.value ()); Boolean rel (options.value ()); -- cgit v1.1