From f0510d2f90467de8e8f260b47d79a9baaf9bef17 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 17 Sep 2009 07:15:29 +0200 Subject: Start tracking XSD with git --- libxsd/xsd/cxx/tree/parsing.txx | 915 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 915 insertions(+) create mode 100644 libxsd/xsd/cxx/tree/parsing.txx (limited to 'libxsd/xsd/cxx/tree/parsing.txx') diff --git a/libxsd/xsd/cxx/tree/parsing.txx b/libxsd/xsd/cxx/tree/parsing.txx new file mode 100644 index 0000000..ebbf17b --- /dev/null +++ b/libxsd/xsd/cxx/tree/parsing.txx @@ -0,0 +1,915 @@ +// file : xsd/cxx/tree/parsing.txx +// author : Boris Kolpackov +// copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC +// license : GNU GPL v2 + exceptions; see accompanying LICENSE file + +#include + +#include // trim + +#include // xml::{string, transcode} +#include // xml::{prefix, uq_name} +#include // xml::bits::{xml_prefix, + // xml_namespace} + +#include // no_prefix_mapping +#include +#include +#include +#include // text_content + +namespace xsd +{ + namespace cxx + { + namespace tree + { + // Note that most of the types implemented here (except string, + // (normalizedString, and base64Binary) cannot have whitespaces + // in the value. As result we don't need to waste time collapsing + // whitespaces. All we need to do is trim the string representation + // which can be done without copying. + // + + // type + // + inline _type:: + _type (const xercesc::DOMElement& e, flags f, container* c) + : dom_info_ (0), container_ (c) + { + if (f & flags::keep_dom) + { + std::auto_ptr r ( + dom_info_factory::create (e, *this, c == 0)); + dom_info_ = r; + } + } + + inline _type:: + _type (const xercesc::DOMAttr& a, flags f, container* c) + : dom_info_ (0), container_ (c) + { + if (f & flags::keep_dom) + { + std::auto_ptr r (dom_info_factory::create (a, *this)); + dom_info_ = r; + } + } + + template + inline _type:: + _type (const std::basic_string&, + const xercesc::DOMElement*, + flags, + container* c) + : dom_info_ (0), // List elements don't have associated DOM nodes. + container_ (c) + { + } + + // simple_type + // + template + inline simple_type:: + simple_type (const xercesc::DOMElement& e, flags f, container* c) + : B (e, f, c) + { + } + + template + inline simple_type:: + simple_type (const xercesc::DOMAttr& a, flags f, container* c) + : B (a, f, c) + { + } + + template + template + inline simple_type:: + simple_type (const std::basic_string& s, + const xercesc::DOMElement* e, + flags f, + container* c) + : B (s, e, f, c) + { + } + + // fundamental_base + // + template + fundamental_base:: + fundamental_base (const xercesc::DOMElement& e, flags f, container* c) + : B (e, f, c), + facet_table_ (0), + x_ (traits::create (e, f, c)) + { + } + + template + fundamental_base:: + fundamental_base (const xercesc::DOMAttr& a, flags f, container* c) + : B (a, f, c), + facet_table_ (0), + x_ (traits::create (a, f, c)) + { + } + + template + fundamental_base:: + fundamental_base (const std::basic_string& s, + const xercesc::DOMElement* e, + flags f, + container* c) + : B (s, e, f, c), + facet_table_ (0), + x_ (traits::create (s, e, f, c)) + { + } + + + // Parsing c-tors for list. + // + + namespace bits + { + // Find first non-space character. + // + template + typename std::basic_string::size_type + find_ns (const C* s, + typename std::basic_string::size_type size, + typename std::basic_string::size_type pos) + { + while (pos < size && + (s[pos] == C (0x20) || // space + s[pos] == C (0x0D) || // carriage return + s[pos] == C (0x09) || // tab + s[pos] == C (0x0A))) + ++pos; + + return pos < size ? pos : std::basic_string::npos; + } + + // Find first space character. + // + template + typename std::basic_string::size_type + find_s (const C* s, + typename std::basic_string::size_type size, + typename std::basic_string::size_type pos) + { + while (pos < size && + s[pos] != C (0x20) && // space + s[pos] != C (0x0D) && // carriage return + s[pos] != C (0x09) && // tab + s[pos] != C (0x0A)) + ++pos; + + return pos < size ? pos : std::basic_string::npos; + } + } + + // Individual items of the list have no DOM association. Therefore + // I clear keep_dom from flags. + // + + template + list:: + list (const xercesc::DOMElement& e, flags f, container* c) + : sequence (flags (f & ~flags::keep_dom), c) // ambiguous + { + init (text_content (e), &e); + } + + template + list:: + list (const xercesc::DOMAttr& a, flags f, container* c) + : sequence (flags (f & ~flags::keep_dom), c) // ambiguous + { + init (xml::transcode (a.getValue ()), a.getOwnerElement ()); + } + + template + list:: + list (const std::basic_string& s, + const xercesc::DOMElement* e, + flags f, + container* c) + : sequence (flags (f & ~flags::keep_dom), c) // ambiguous + { + init (s, e); + } + + template + void list:: + init (const std::basic_string& s, const xercesc::DOMElement* parent) + { + if (s.size () == 0) + return; + + using std::basic_string; + typedef typename sequence::ptr ptr; + typedef typename basic_string::size_type size_type; + + const C* data (s.c_str ()); + size_type size (s.size ()); + + // Traverse the data while logically collapsing spaces. + // + for (size_type i (bits::find_ns (data, size, 0)); + i != basic_string::npos;) + { + size_type j (bits::find_s (data, size, i)); + + if (j != basic_string::npos) + { + ptr r ( + new T (basic_string (data + i, j - i), + parent, + this->flags_, + this->container_)); + + this->v_.push_back (r); + + i = bits::find_ns (data, size, j); + } + else + { + // Last element. + // + ptr r ( + new T (basic_string (data + i, size - i), + parent, + this->flags_, + this->container_)); + + this->v_.push_back (r); + + break; + } + } + } + + template + list:: + list (const xercesc::DOMElement& e, flags f, container* c) + : sequence (flags (f & ~flags::keep_dom), c) // ambiguous + { + init (text_content (e), &e); + } + + template + inline list:: + list (const xercesc::DOMAttr& a, flags f, container* c) + : sequence (flags (f & ~flags::keep_dom), c) // ambiguous + { + init (xml::transcode (a.getValue ()), a.getOwnerElement ()); + } + + template + inline list:: + list (const std::basic_string& s, + const xercesc::DOMElement* parent, + flags f, + container* c) + : sequence (flags (f & ~flags::keep_dom), c) // ambiguous + { + init (s, parent); + } + + template + inline void list:: + init (const std::basic_string& s, const xercesc::DOMElement* parent) + { + if (s.size () == 0) + return; + + using std::basic_string; + typedef typename basic_string::size_type size_type; + + const C* data (s.c_str ()); + size_type size (s.size ()); + + // Traverse the data while logically collapsing spaces. + // + for (size_type i (bits::find_ns (data, size, 0)); + i != basic_string::npos;) + { + size_type j (bits::find_s (data, size, i)); + + if (j != basic_string::npos) + { + push_back ( + traits::create ( + basic_string (data + i, j - i), parent, 0, 0)); + + i = bits::find_ns (data, size, j); + } + else + { + // Last element. + // + push_back ( + traits::create ( + basic_string (data + i, size - i), parent, 0, 0)); + + break; + } + } + } + + + // Parsing c-tors for built-in types. + // + + + // string + // + template + string:: + string (const xercesc::DOMElement& e, flags f, container* c) + : B (e, f, c), + base_type (text_content (e)) + { + } + + template + string:: + string (const xercesc::DOMAttr& a, flags f, container* c) + : B (a, f, c), + base_type (xml::transcode (a.getValue ())) + { + } + + template + string:: + string (const std::basic_string& s, + const xercesc::DOMElement* e, + flags f, + container* c) + : B (s, e, f, c), base_type (s) + { + } + + + // normalized_string + // + template + normalized_string:: + normalized_string (const xercesc::DOMElement& e, flags f, container* c) + : base_type (e, f, c) + { + normalize (); + } + + template + normalized_string:: + normalized_string (const xercesc::DOMAttr& a, flags f, container* c) + : base_type (a, f, c) + { + normalize (); + } + + template + normalized_string:: + normalized_string (const std::basic_string& s, + const xercesc::DOMElement* e, + flags f, + container* c) + : base_type (s, e, f, c) + { + normalize (); + } + + template + void normalized_string:: + normalize () + { + typedef typename std::basic_string::size_type size_type; + + size_type size (this->size ()); + + for (size_type i (0); i < size; ++i) + { + C& c ((*this)[i]); + + if (c == C (0x0D) || // carriage return + c == C (0x09) || // tab + c == C (0x0A)) + c = C (0x20); + } + } + + + // token + // + template + token:: + token (const xercesc::DOMElement& e, flags f, container* c) + : base_type (e, f, c) + { + collapse (); + } + + template + token:: + token (const xercesc::DOMAttr& a, flags f, container* c) + : base_type (a, f, c) + { + collapse (); + } + + template + token:: + token (const std::basic_string& s, + const xercesc::DOMElement* e, + flags f, + container* c) + : base_type (s, e, f, c) + { + collapse (); + } + + template + void token:: + collapse () + { + // We have all whitespace normilized by our base. We just + // need to collapse them. + // + typedef typename std::basic_string::size_type size_type; + + size_type size (this->size ()), j (0); + bool subs (false), trim (true); + + for (size_type i (0); i < size; ++i) + { + C c ((*this)[i]); + + if (c == C (0x20)) + { + subs = true; + } + else + { + if (subs) + { + subs = false; + + if (!trim) + (*this)[j++] = C (0x20); + } + + if (trim) + trim = false; + + (*this)[j++] = c; + } + } + + this->resize (j); + } + + + // nmtoken + // + template + nmtoken:: + nmtoken (const xercesc::DOMElement& e, flags f, container* c) + : base_type (e, f, c) + { + } + + template + nmtoken:: + nmtoken (const xercesc::DOMAttr& a, flags f, container* c) + : base_type (a, f, c) + { + } + + template + nmtoken:: + nmtoken (const std::basic_string& s, + const xercesc::DOMElement* e, + flags f, + container* c) + : base_type (s, e, f, c) + { + } + + + // nmtokens + // + template + nmtokens:: + nmtokens (const xercesc::DOMElement& e, flags f, container* c) + : B (e, f, c), base_type (e, f, c) + { + } + + template + nmtokens:: + nmtokens (const xercesc::DOMAttr& a, flags f, container* c) + : B (a, f, c), base_type (a, f, c) + { + } + + template + nmtokens:: + nmtokens (const std::basic_string& s, + const xercesc::DOMElement* e, + flags f, + container* c) + : B (s, e, f, c), base_type (s, e, f, c) + { + } + + + // name + // + template + name:: + name (const xercesc::DOMElement& e, flags f, container* c) + : base_type (e, f, c) + { + } + + template + name:: + name (const xercesc::DOMAttr& a, flags f, container* c) + : base_type (a, f, c) + { + } + + template + name:: + name (const std::basic_string& s, + const xercesc::DOMElement* e, + flags f, + container* c) + : base_type (s, e, f, c) + { + } + + + // ncname + // + template + ncname:: + ncname (const xercesc::DOMElement& e, flags f, container* c) + : base_type (e, f, c) + { + } + + template + ncname:: + ncname (const xercesc::DOMAttr& a, flags f, container* c) + : base_type (a, f, c) + { + } + + template + ncname:: + ncname (const std::basic_string& s, + const xercesc::DOMElement* e, + flags f, + container* c) + : base_type (s, e, f, c) + { + } + + + // language + // + template + language:: + language (const xercesc::DOMElement& e, flags f, container* c) + : base_type (e, f, c) + { + } + + template + language:: + language (const xercesc::DOMAttr& a, flags f, container* c) + : base_type (a, f, c) + { + } + + template + language:: + language (const std::basic_string& s, + const xercesc::DOMElement* e, + flags f, + container* c) + : base_type (s, e, f, c) + { + } + + + // id + // + template + id:: + id (const xercesc::DOMElement& e, flags f, container* c) + : base_type (e, f, c), identity_ (*this) + { + register_id (); + } + + template + id:: + id (const xercesc::DOMAttr& a, flags f, container* c) + : base_type (a, f, c), identity_ (*this) + { + register_id (); + } + + template + id:: + id (const std::basic_string& s, + const xercesc::DOMElement* e, + flags f, + container* c) + : base_type (s, e, f, c), identity_ (*this) + { + register_id (); + } + + + // idref + // + template + idref:: + idref (const xercesc::DOMElement& e, flags f, container* c) + : base_type (e, f, c), identity_ (*this) + { + } + + template + idref:: + idref (const xercesc::DOMAttr& a, flags f, container* c) + : base_type (a, f , c), identity_ (*this) + { + } + + template + idref:: + idref (const std::basic_string& s, + const xercesc::DOMElement* e, + flags f, + container* c) + : base_type (s, e, f, c), identity_ (*this) + { + } + + + + // idrefs + // + template + idrefs:: + idrefs (const xercesc::DOMElement& e, flags f, container* c) + : B (e, f, c), base_type (e, f, c) + { + } + + template + idrefs:: + idrefs (const xercesc::DOMAttr& a, flags f, container* c) + : B (a, f, c), base_type (a, f, c) + { + } + + template + idrefs:: + idrefs (const std::basic_string& s, + const xercesc::DOMElement* e, + flags f, + container* c) + : B (s, e, f, c), base_type (s, e, f, c) + { + } + + + // uri + // + template + uri:: + uri (const xercesc::DOMElement& e, flags f, container* c) + : B (e, f, c), + base_type (trim (text_content (e))) + { + } + + template + uri:: + uri (const xercesc::DOMAttr& a, flags f, container* c) + : B (a, f, c), + base_type (trim (xml::transcode (a.getValue ()))) + { + } + + template + uri:: + uri (const std::basic_string& s, + const xercesc::DOMElement* e, + flags f, + container* c) + : B (s, e, f, c), base_type (trim (s)) + { + } + + + // qname + // + template + qname:: + qname (const xercesc::DOMElement& e, flags f, container* c) + : B (e, f, c) + { + std::basic_string v (trim (text_content (e))); + ns_ = resolve (v, &e); + name_ = xml::uq_name (v); + } + + template + qname:: + qname (const xercesc::DOMAttr& a, flags f, container* c) + : B (a, f, c) + { + std::basic_string v (trim (xml::transcode (a.getValue ()))); + ns_ = resolve (v, a.getOwnerElement ()); + name_ = xml::uq_name (v); + } + + template + qname:: + qname (const std::basic_string& s, + const xercesc::DOMElement* e, + flags f, + container* c) + : B (s, e, f, c) + { + std::basic_string v (trim (s)); + ns_ = resolve (v, e); + name_ = xml::uq_name (v); + } + + template + uri qname:: + resolve (const std::basic_string& s, const xercesc::DOMElement* e) + { + std::basic_string p (xml::prefix (s)); + + if (e) + { + // This code is copied verbatim from xml/dom/elements.hxx. + // + + // 'xml' prefix requires special handling and Xerces folks refuse + // to handle this in DOM so I have to do it myself. + // + if (p == xml::bits::xml_prefix ()) + return xml::bits::xml_namespace (); + + const XMLCh* xns ( + e->lookupNamespaceURI ( + p.empty () ? 0 : xml::string (p).c_str ())); + + if (xns != 0) + return xml::transcode (xns); + else if (p.empty ()) + return std::basic_string (); + } + + throw no_prefix_mapping (p); + } + + + // base64_binary + // + // We are not doing whitespace collapsing since the decode + // functions can handle it like this. + // + template + base64_binary:: + base64_binary (const xercesc::DOMElement& e, flags f, container* c) + : B (e, f, c) + { + // This implementation is not optimal. + // + std::basic_string str (trim (text_content (e))); + decode (xml::string (str).c_str ()); + } + + template + base64_binary:: + base64_binary (const xercesc::DOMAttr& a, flags f, container* c) + : B (a, f, c) + { + std::basic_string str (trim (xml::transcode (a.getValue ()))); + decode (xml::string (str).c_str ()); + } + + template + base64_binary:: + base64_binary (const std::basic_string& s, + const xercesc::DOMElement* e, + flags f, + container* c) + : B (s, e, f, c) + { + std::basic_string str (trim (s)); + decode (xml::string (str).c_str ()); + } + + + // hex_binary + // + template + hex_binary:: + hex_binary (const xercesc::DOMElement& e, flags f, container* c) + : B (e, f, c) + { + // This implementation is not optimal. + // + std::basic_string str (trim (text_content (e))); + decode (xml::string (str).c_str ()); + } + + template + hex_binary:: + hex_binary (const xercesc::DOMAttr& a, flags f, container* c) + : B (a, f, c) + { + std::basic_string str (trim (xml::transcode (a.getValue ()))); + decode (xml::string (str).c_str ()); + } + + template + hex_binary:: + hex_binary (const std::basic_string& s, + const xercesc::DOMElement* e, + flags f, + container* c) + : B (s, e, f, c) + { + std::basic_string str (trim (s)); + decode (xml::string (str).c_str ()); + } + + // entity + // + template + entity:: + entity (const xercesc::DOMElement& e, flags f, container* c) + : base_type (e, f, c) + { + } + + template + entity:: + entity (const xercesc::DOMAttr& a, flags f, container* c) + : base_type (a, f, c) + { + } + + template + entity:: + entity (const std::basic_string& s, + const xercesc::DOMElement* e, + flags f, + container* c) + : base_type (s, e, f, c) + { + } + + + // entities + // + template + entities:: + entities (const xercesc::DOMElement& e, flags f, container* c) + : B (e, f, c), base_type (e, f, c) + { + } + + template + entities:: + entities (const xercesc::DOMAttr& a, flags f, container* c) + : B (a, f, c), base_type (a, f, c) + { + } + + template + entities:: + entities (const std::basic_string& s, + const xercesc::DOMElement* e, + flags f, + container* c) + : B (s, e, f, c), base_type (s, e, f, c) + { + } + } + } +} -- cgit v1.1