From 5e527213a2430bb3018e5eebd909aef294edf9b5 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Fri, 18 Dec 2020 18:48:46 +0300 Subject: Switch to build2 --- xsd/xsd/cxx/elements.cxx | 1322 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1322 insertions(+) create mode 100644 xsd/xsd/cxx/elements.cxx (limited to 'xsd/xsd/cxx/elements.cxx') diff --git a/xsd/xsd/cxx/elements.cxx b/xsd/xsd/cxx/elements.cxx new file mode 100644 index 0000000..e914f9d --- /dev/null +++ b/xsd/xsd/cxx/elements.cxx @@ -0,0 +1,1322 @@ +// file : xsd/cxx/elements.cxx +// license : GNU GPL v2 + exceptions; see accompanying LICENSE file + +#include + +#include // std::toupper +#include +#include +#include +#include +#include + +using std::wcerr; +using std::endl; + +namespace CXX +{ + // + // + wchar_t + upcase (wchar_t c) + { + return std::toupper (c); + } + + namespace + { + wchar_t const* keywords[] = { + L"NULL", + L"and", + L"asm", + L"auto", + L"bitand", + L"bitor", + L"bool", + L"break", + L"case", + L"catch", + L"char", + L"class", + L"compl", + L"const", + L"const_cast", + L"continue", + L"default", + L"delete", + L"do", + L"double", + L"dynamic_cast", + L"else", + L"end_eq", + L"enum", + L"explicit", + L"export", + L"extern", + L"false", + L"float", + L"for", + L"friend", + L"goto", + L"if", + L"inline", + L"int", + L"long", + L"mutable", + L"namespace", + L"new", + L"not", + L"not_eq", + L"operator", + L"or", + L"or_eq", + L"private", + L"protected", + L"public", + L"register", + L"reinterpret_cast", + L"return", + L"short", + L"signed", + L"sizeof", + L"static", + L"static_cast", + L"struct", + L"switch", + L"template", + L"this", + L"throw", + L"true", + L"try", + L"typedef", + L"typeid", + L"typename", + L"union", + L"unsigned", + L"using", + L"virtual", + L"void", + L"volatile", + L"wchar_t", + L"while", + L"xor", + L"xor_eq" + }; + } + + // Context + // + + Context:: + Context (std::wostream& o, + SemanticGraph::Schema& root, + SemanticGraph::Path const& path, + options_type const& ops, + StringLiteralMap const* string_literal_map_) + : os (o), + schema_root (root), + schema_path (schema_path_), + options (ops), + std (ops.std ()), + char_type (char_type_), + char_encoding (char_encoding_), + L (L_), + string_type (string_type_), + auto_ptr (auto_ptr_), + string_literal_map (string_literal_map_), + type_exp (type_exp_), + inst_exp (inst_exp_), + inl (inl_), + ns_mapping_cache (ns_mapping_cache_), + schema_path_ (path), + xs_ns_ (0), + char_type_ (ops.char_type ()), + char_encoding_ (ops.char_encoding ()), + L_ (char_type == L"wchar_t" ? L"L" : L""), + inl_ (ops.generate_inline () ? L"inline\n" : L""), + cxx_id_expr_ (L"^(::)?([a-zA-Z_]\\w*)(::[a-zA-Z_]\\w*)*$"), + cxx_id_expr (cxx_id_expr_), + urn_mapping_ (L"#^urn.*:([a-zA-Z_].*)$#$1#"), + urn_mapping (urn_mapping_), + nsr_mapping (nsr_mapping_), + nsm_mapping (nsm_mapping_), + include_mapping (include_mapping_), + reserved_name_map (reserved_name_map_), + keyword_set (keyword_set_) + { + // Export symbol. + // + { + String es (ops.export_symbol ()); + type_exp_ = es ? es + L" " : es; + inst_exp_ = es ? es + L"\n" : es; + } + + // Resolve and cache XML Schema namespace. + // + { + SemanticGraph::Nameable* n; + + if (schema_root.names_begin ()->name () == + L"http://www.w3.org/2001/XMLSchema") + { + // schema_root is the XML Schema itself. + // + n = &schema_root.names_begin ()->named (); + } + else + { + // Otherwise, the first used schema is implied XML Schema. + // + SemanticGraph::Uses& u = *schema_root.uses_begin (); + assert (u.is_a ()); + n = &u.schema ().names_begin ()->named (); + } + + xs_ns_ = dynamic_cast (n); + } + + // String type. + // + if (char_type == L"char") + string_type_ = L"::std::string"; + else if (char_type == L"wchar_t") + string_type_ = L"::std::wstring"; + else + string_type_ = L"::std::basic_string< " + char_type + L" >"; + + // Automatic pointer type. + // + auto_ptr_ = std >= cxx_version::cxx11 + ? "::std::unique_ptr" + : "::std::auto_ptr"; + + // Default encoding. + // + if (!char_encoding) + { + if (char_type == L"char") + char_encoding = L"utf8"; + else + char_encoding = L"auto"; + } + + // Default mapping. + // + nsr_mapping_.push_back ( + Regex (L"#^.* (.*?/)??" L"(([a-zA-Z_]\\w*)(/[a-zA-Z_]\\w*)*)/?$#$2#")); + nsr_mapping_.push_back ( + Regex (L"#^.* http://www\\.w3\\.org/2001/XMLSchema$#xml_schema#")); + + // Custom regex mapping. + // + for (NarrowStrings::const_iterator i (ops.namespace_regex ().begin ()), + e (ops.namespace_regex ().end ()); i != e; ++i) + { + nsr_mapping_.push_back (Regex (String (*i))); + } + + // Custom direct mapping. + // + for (NarrowStrings::const_iterator i (ops.namespace_map ().begin ()), + e (ops.namespace_map ().end ()); i != e; ++i) + { + String s (*i); + + // Split the string in two parts at the last '='. + // + size_t pos (s.rfind ('=')); + + if (pos == String::npos) + throw InvalidNamespaceMapping (s, "delimiter ('=') not found"); + + // Empty xml_ns designates the no-namespace case. + // + String xml_ns (s, 0, pos); + String cxx_ns (s, pos + 1); + + if (!cxx_ns.empty () && !cxx_id_expr.match (cxx_ns)) + throw InvalidNamespaceMapping (s, "invalid C++ identifier"); + + nsm_mapping_[xml_ns] = cxx_ns; + } + + // Include path regex + // + for (NarrowStrings::const_iterator i (ops.include_regex ().begin ()), + e (ops.include_regex ().end ()); i != e; ++i) + { + include_mapping_.push_back (Regex (String (*i))); + } + + // Reserved names. + // + for (NarrowStrings::const_iterator i (ops.reserved_name ().begin ()), + e (ops.reserved_name ().end ()); i != e; ++i) + { + String s (*i); + + // Split the string in two parts at '='. + // + size_t pos (s.find ('=')); + + if (pos == String::npos) + reserved_name_map_[s] = L""; + else + reserved_name_map_[String (s, 0, pos)] = String (s, pos + 1); + } + + // Populate the keyword set. + // + for (size_t i (0); i < sizeof (keywords) / sizeof (char*); ++i) + keyword_set_.insert (keywords[i]); + } + + String Context:: + ns_name (SemanticGraph::Namespace& ns) + { + using SemanticGraph::Schema; + using SemanticGraph::Includes; + using SemanticGraph::Imports; + using SemanticGraph::Implies; + using SemanticGraph::Sources; + + String tmp; + MapMapping::const_iterator i (nsm_mapping.find (ns.name ())); + + if (i != nsm_mapping.end ()) + { + tmp = i->second; + } + else + { + SemanticGraph::Path path; + Schema& schema (dynamic_cast (ns.scope ())); + + if (schema.used_p ()) + { + // Here we need to detect a special multi-schema compilation + // case where the root schemas are imported into a special + // schema that doesn't have a namespace. + // + SemanticGraph::Uses& u (*schema.used_begin ()); + SemanticGraph::Schema& s (u.user ()); + + if (s.names_begin () != s.names_end ()) + path = u.path (); + } + else + path = schema_path; + + String pair; + + if (!path.empty ()) + { + path.normalize (); + + // Try to use the portable representation of the path. If that + // fails, fall back to the native representation. + // + try + { + pair = path.posix_string (); + } + catch (SemanticGraph::InvalidPath const&) + { + pair = path.string (); + } + } + + pair += L' ' + ns.name (); + + // Check cache first + // + MappingCache::const_iterator i (ns_mapping_cache.find (pair)); + + if (i != ns_mapping_cache.end ()) + { + tmp = i->second; + } + else + { + bool trace (options.namespace_regex_trace ()); + + if (trace) + wcerr << "namespace: '" << pair << "'" << endl; + + bool found (false); + Regex colon (L"#/#::#"); + + for (RegexMapping::const_reverse_iterator e (nsr_mapping.rbegin ()); + e != nsr_mapping.rend (); ++e) + { + if (trace) + wcerr << "try: '" << e->regex () << "' : "; + + if (e->match (pair)) + { + tmp = e->replace (pair); + tmp = colon.replace (tmp); // replace `/' with `::' + + // Check the result. + // + found = cxx_id_expr.match (tmp); + + if (trace) + wcerr << "'" << tmp << "' : "; + } + + if (trace) + wcerr << (found ? '+' : '-') << endl; + + if (found) + break; + } + + if (!found) + { + String const& n (ns.name ()); + + // Check if the name is valid by itself. + // + if (n.empty ()) + { + // Empty name denotes a no-namespace case. + // + tmp = n; + } + else + { + tmp = colon.replace (n); // replace `/' with `::' + + if (!cxx_id_expr.match (tmp)) + { + // See if this is a urn-style namespace. + // + if (urn_mapping.match (n)) + { + Regex filter (L"#[.:-]#_#"); + tmp = urn_mapping.replace (n); + tmp = filter.replace (tmp); + + if (!cxx_id_expr.match (tmp)) + throw NoNamespaceMapping ( + ns.file (), ns.line (), ns.column (), ns.name ()); + } + else + throw NoNamespaceMapping ( + ns.file (), ns.line (), ns.column (), ns.name ()); + } + } + } + + // Add the mapping to the cache. + // + ns_mapping_cache[pair] = tmp; + } + } + + + // Parse resulting namespace string and id() each name. + // + String r; + String::size_type b (0), e; + + do + { + e = tmp.find (L"::", b); + + String name (tmp, b, e == tmp.npos ? e : e - b); + + if (!name.empty ()) + r += L"::" + escape (name); + + b = e; + + if (b == tmp.npos) + break; + + b += 2; + + } while (true); + + return r; + } + + SemanticGraph::Namespace& Context:: + xs_ns () + { + return *xs_ns_; + } + + String Context:: + xs_ns_name () + { + return ns_name (*xs_ns_); + } + + SemanticGraph::Namespace& Context:: + namespace_ (SemanticGraph::Nameable& n) + { + // The basic idea goes like this: go up Names edges until you + // reach Namespace. There are, however, anonymous types which + // need special handling. In the case of an anonymous type we + // will go up the first Belongs edge (because the first edge + // is where the type was defined. + // + + if (n.named_p ()) + { + SemanticGraph::Scope& s (n.scope ()); + + SemanticGraph::Namespace* ns ( + dynamic_cast (&n)); + + return ns ? *ns : namespace_ (s); + } + else + { + SemanticGraph::Type& t (dynamic_cast (n)); + + SemanticGraph::Belongs& b (*t.classifies_begin ()); + + return namespace_ (b.instance ()); + } + } + + String Context:: + xml_ns_name (SemanticGraph::Nameable& n) + { + return namespace_ (n).name (); + } + + String Context:: + fq_name (SemanticGraph::Nameable& n, char const* name_key) + { + using namespace SemanticGraph; + + String r; + + if (dynamic_cast (&n)) + { + return L""; // Map to global namespace. + } + else if (SemanticGraph::Namespace* ns = + dynamic_cast (&n)) + { + r = ns_name (*ns); + } + else + { + r = fq_name (n.scope ()); + r += L"::"; + r += n.context ().get (name_key); + } + + return r; + } + + SemanticGraph::Type& Context:: + ultimate_base (SemanticGraph::Complex& c) + { + using namespace SemanticGraph; + + Type* b (&c.inherits ().base ()); + + while (true) + { + Complex* cb (dynamic_cast (b)); + + if (cb != 0 && cb->inherits_p ()) + { + b = &cb->inherits ().base (); + continue; + } + + break; + } + + return *b; + } + + String Context:: + escape (String const& name) + { + String r; + size_t n (name.size ()); + + // In most common cases we will have that many chars. + // + r.reserve (n); + + for (size_t i (0); i < n; ++i) + { + bool first (i == 0); + + unsigned int u (unicode_char (name, i)); // May advance i. + + if (first) + { + if (!((u >= 'a' && u <= 'z') || + (u >= 'A' && u <= 'Z') || + u == '_')) + r = (u >= '0' && u <= '9') ? L"cxx_" : L"cxx"; + } + + if (!((u >= 'a' && u <= 'z') || + (u >= 'A' && u <= 'Z') || + (u >= '0' && u <= '9') || + u == '_')) + r.push_back ('_'); + else + r.push_back (static_cast (u)); + } + + if (r.empty ()) + r = L"cxx"; + + // Custom reserved words. + // + ReservedNameMap::const_iterator i (reserved_name_map.find (r)); + + if (i != reserved_name_map.end ()) + { + if (i->second) + return i->second; + else + r += L'_'; + } + + // Keywords + // + if (keyword_set.find (r) != keyword_set.end ()) + { + r += L'_'; + + // Re-run custom words. + // + i = reserved_name_map.find (r); + + if (i != reserved_name_map.end ()) + { + if (i->second) + return i->second; + else + r += L'_'; + } + } + + return r; + } + + // String escaping. + // + + String + charlit (unsigned int u) + { + String r ("\\x"); + bool lead (true); + + for (int i (7); i >= 0; --i) + { + unsigned int x ((u >> (i * 4)) & 0x0F); + + if (lead) + { + if (x == 0) + continue; + + lead = false; + } + + r += x < 10 ? ('0' + x) : ('A' + x - 10); + } + + return r; + } + + String + strlit_ascii (String const& str) + { + String r; + size_t n (str.size ()); + + // In most common cases we will have that many chars. + // + r.reserve (n + 2); + + r += '"'; + + bool escape (false); + + for (size_t i (0); i < n; ++i) + { + unsigned int u (Context::unicode_char (str, i)); // May advance i. + + // [128 - ] - unrepresentable + // 127 - \x7F + // [32 - 126] - as is + // [0 - 31] - \X or \xXX + // + + if (u < 32 || u == 127) + { + switch (u) + { + case L'\n': + { + r += L"\\n"; + break; + } + case L'\t': + { + r += L"\\t"; + break; + } + case L'\v': + { + r += L"\\v"; + break; + } + case L'\b': + { + r += L"\\b"; + break; + } + case L'\r': + { + r += L"\\r"; + break; + } + case L'\f': + { + r += L"\\f"; + break; + } + case L'\a': + { + r += L"\\a"; + break; + } + default: + { + r += charlit (u); + escape = true; + break; + } + } + } + else if (u < 127) + { + if (escape) + { + // Close and open the string so there are no clashes. + // + r += '"'; + r += '"'; + + escape = false; + } + + switch (u) + { + case L'"': + { + r += L"\\\""; + break; + } + case L'\\': + { + r += L"\\\\"; + break; + } + default: + { + r += static_cast (u); + break; + } + } + } + else + { + // Unrepresentable character. + // + throw UnrepresentableCharacter (str, i + 1); + } + } + + r += '"'; + + return r; + } + + const unsigned int utf8_first_char_mask[5] = + { + 0x00, 0x00, 0xC0, 0xE0, 0xF0 + }; + + String + strlit_utf8 (String const& str) + { + String r; + size_t n (str.size ()); + + // In most common cases we will have that many chars. + // + r.reserve (n + 2); + + r += '"'; + + bool escape (false); + + for (size_t i (0); i < n; ++i) + { + unsigned int u (Context::unicode_char (str, i)); // May advance i. + + // [128 - ] - UTF-8 + // 127 - \x7F + // [32 - 126] - as is + // [0 - 31] - \X or \xXX + // + + if (u < 32 || u == 127) + { + switch (u) + { + case L'\n': + { + r += L"\\n"; + break; + } + case L'\t': + { + r += L"\\t"; + break; + } + case L'\v': + { + r += L"\\v"; + break; + } + case L'\b': + { + r += L"\\b"; + break; + } + case L'\r': + { + r += L"\\r"; + break; + } + case L'\f': + { + r += L"\\f"; + break; + } + case L'\a': + { + r += L"\\a"; + break; + } + default: + { + r += charlit (u); + escape = true; + break; + } + } + } + else if (u < 127) + { + if (escape) + { + // Close and open the string so there are no clashes. + // + r += '"'; + r += '"'; + + escape = false; + } + + switch (u) + { + case L'"': + { + r += L"\\\""; + break; + } + case L'\\': + { + r += L"\\\\"; + break; + } + default: + { + r += static_cast (u); + break; + } + } + } + else + { + unsigned int count (0); + unsigned int tmp[4]; + + if (u < 0x800) + count = 2; + else if (u < 0x10000) + count = 3; + else if (u < 0x110000) + count = 4; + + switch (count) + { + case 4: + { + tmp[3] = (u | 0x80UL) & 0xBFUL; + u >>= 6; + } + // Fall through. + case 3: + { + tmp[2] = (u | 0x80UL) & 0xBFUL; + u >>= 6; + } + // Fall through. + case 2: + { + tmp[1] = (u | 0x80UL) & 0xBFUL; + u >>= 6; + } + // Fall through. + case 1: + { + tmp[0] = u | utf8_first_char_mask[count]; + break; + } + default: + assert (false); + } + + for (unsigned int j (0); j < count; ++j) + r += charlit (tmp[j]); + + escape = true; + } + } + + r += '"'; + + return r; + } + + String + strlit_iso8859_1 (String const& str) + { + String r; + size_t n (str.size ()); + + // In most common cases we will have that many chars. + // + r.reserve (n + 2); + + r += '"'; + + bool escape (false); + + for (size_t i (0); i < n; ++i) + { + unsigned int u (Context::unicode_char (str, i)); // May advance i. + + // [256 - ] - unrepresentable + // [127 - 255] - \xXX + // [32 - 126] - as is + // [0 - 31] - \X or \xXX + // + + if (u < 32) + { + switch (u) + { + case L'\n': + { + r += L"\\n"; + break; + } + case L'\t': + { + r += L"\\t"; + break; + } + case L'\v': + { + r += L"\\v"; + break; + } + case L'\b': + { + r += L"\\b"; + break; + } + case L'\r': + { + r += L"\\r"; + break; + } + case L'\f': + { + r += L"\\f"; + break; + } + case L'\a': + { + r += L"\\a"; + break; + } + default: + { + r += charlit (u); + escape = true; + break; + } + } + } + else if (u < 127) + { + if (escape) + { + // Close and open the string so there are no clashes. + // + r += '"'; + r += '"'; + + escape = false; + } + + switch (u) + { + case L'"': + { + r += L"\\\""; + break; + } + case L'\\': + { + r += L"\\\\"; + break; + } + default: + { + r += static_cast (u); + break; + } + } + } + else if (u < 256) + { + r += charlit (u); + escape = true; + } + else + { + // Unrepresentable character. + // + throw UnrepresentableCharacter (str, i + 1); + } + } + + r += '"'; + + return r; + } + + String + strlit_utf32 (String const& str) + { + String r; + size_t n (str.size ()); + + // In most common cases we will have that many chars. + // + r.reserve (n + 3); + + r += L"L\""; + + bool escape (false); + + for (size_t i (0); i < n; ++i) + { + unsigned int u (Context::unicode_char (str, i)); // May advance i. + + // [128 - ] - \xUUUUUUUU + // 127 - \x7F + // [32 - 126] - as is + // [0 - 31] - \X or \xXX + // + + if (u < 32 || u == 127) + { + switch (u) + { + case L'\n': + { + r += L"\\n"; + break; + } + case L'\t': + { + r += L"\\t"; + break; + } + case L'\v': + { + r += L"\\v"; + break; + } + case L'\b': + { + r += L"\\b"; + break; + } + case L'\r': + { + r += L"\\r"; + break; + } + case L'\f': + { + r += L"\\f"; + break; + } + case L'\a': + { + r += L"\\a"; + break; + } + default: + { + r += charlit (u); + escape = true; + break; + } + } + } + else if (u < 127) + { + if (escape) + { + // Close and open the string so there are no clashes. C++11 + // requires a space between " and L. + // + r += L"\" L\""; + escape = false; + } + + switch (u) + { + case L'"': + { + r += L"\\\""; + break; + } + case L'\\': + { + r += L"\\\\"; + break; + } + default: + { + r += static_cast (u); + break; + } + } + } + else + { + r += charlit (u); + escape = true; + } + } + + r += '"'; + + return r; + } + + String Context:: + strlit (String const& str) + { + // First see if we have a custom mapping. + // + assert (string_literal_map != 0); + StringLiteralMap::const_iterator i (string_literal_map->find (str)); + + if (i != string_literal_map->end ()) + return i->second; + + if (char_type == L"char") + { + if (char_encoding == L"utf8") + return strlit_utf8 (str); + else if (char_encoding == L"iso8859-1") + return strlit_iso8859_1 (str); + else + { + // For LCP, custom, and other unknown encodings, use ASCII. + // + return strlit_ascii (str); + } + } + else + return strlit_utf32 (str); + } + + String Context:: + comment (String const& str) + { + String r; + + wchar_t const* s (str.c_str ()); + size_t size (str.size ()); + + // In most common cases we will have that many chars. + // + r.reserve (size); + + for (wchar_t const* p (s); p < s + size; ++p) + { + unsigned int u (unicode_char (p)); // May advance p. + + // We are going to treat \v, \f and \n as unrepresentable + // here even though they can be present in C++ source code. + // + if (u > 127 || (u < 32 && u != '\t')) + r += L'?'; + else + r += static_cast (u); + } + + return r; + } + + String Context:: + process_include_path (String const& name) const + { + String path (String (options.include_prefix ()) + name); + bool trace (options.include_regex_trace ()); + + if (trace) + wcerr << "include: '" << path << "'" << endl; + + String r; + bool found (false); + + for (RegexMapping::const_reverse_iterator e (include_mapping.rbegin ()); + e != include_mapping.rend (); ++e) + { + if (trace) + wcerr << "try: '" << e->regex () << "' : "; + + if (e->match (path)) + { + r = e->replace (path); + found = true; + + if (trace) + wcerr << "'" << r << "' : "; + } + + if (trace) + wcerr << (found ? '+' : '-') << endl; + + if (found) + break; + } + + if (!found) + r = path; + + if (!r.empty () && r[0] != L'"' && r[0] != L'<') + { + wchar_t op (options.include_with_brackets () ? L'<' : L'"'); + wchar_t cl (options.include_with_brackets () ? L'>' : L'"'); + r = op + r + cl; + } + + return r; + } + + // Namespace + // + + void Namespace:: + pre (Type& n) + { + String ns (ctx_.ns_name (n)); + + String::size_type b (0), e; + + if (st_) + st_->enter (n, L"", ns ? false : true); + + do + { + e = ns.find (L"::", b); + + String name (ns, b, e == ns.npos ? e : e - b); + + if (!name.empty ()) + { + if (st_) + st_->enter (n, name, e == ns.npos); + + ctx_.os << "namespace " << name << "{"; + } + + b = e; + + if (b == ns.npos) + break; + + b += 2; + + } while (true); + } + + void Namespace:: + post (Type& n) + { + String ns (ctx_.ns_name (n)); + + String::size_type b (0), e; + + do + { + e = ns.find (L"::", b); + + String name (ns, b, e == ns.npos ? e : e - b); + + if (!name.empty ()) + { + ctx_.os << "}"; + + if (st_) + st_->leave (); + } + + + b = e; + + if (b == ns.npos) + break; + + b += 2; + + } + while (true); + + if (st_) + st_->leave (); + } +} -- cgit v1.1