diff options
author | Boris Kolpackov <boris@codesynthesis.com> | 2014-05-14 21:29:29 -0700 |
---|---|---|
committer | Boris Kolpackov <boris@codesynthesis.com> | 2014-05-14 21:29:29 -0700 |
commit | e897aa91a2a5c68a2f795f6a0a995600f13a85f8 (patch) | |
tree | 75a17430ba9cec822651b881003f66da8c89d7f9 /xml/parser | |
parent | 3bf332a7b77e9ce9e5eb0a1dfd5f64f238f4f17f (diff) |
Convert to extension-less headers for API
Diffstat (limited to 'xml/parser')
-rw-r--r-- | xml/parser | 473 |
1 files changed, 473 insertions, 0 deletions
diff --git a/xml/parser b/xml/parser new file mode 100644 index 0000000..a1b6250 --- /dev/null +++ b/xml/parser @@ -0,0 +1,473 @@ +// file : xml/parser -*- C++ -*- +// copyright : Copyright (c) 2013-2014 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#ifndef XML_PARSER +#define XML_PARSER + +#include <xml/details/pre.hxx> + +#include <map> +#include <vector> +#include <string> +#include <iosfwd> +#include <cstddef> // std::size_t + +#include <xml/details/config.hxx> // LIBSTUDXML_EXTERNAL_EXPAT + +#ifndef LIBSTUDXML_EXTERNAL_EXPAT +# include <xml/details/expat/expat.h> +#else +# include <expat.h> +#endif + +// We only support UTF-8 Expat. +// +#ifdef XML_UNICODE +# error UTF-16 expat (XML_UNICODE defined) is not supported +#endif + +#include <xml/forward> +#include <xml/qname> +#include <xml/content> +#include <xml/exception> + +#include <xml/details/export.hxx> + +namespace xml +{ + class parser; + + struct LIBSTUDXML_EXPORT parsing: exception + { + virtual + ~parsing () throw (); + + parsing (const std::string& name, + unsigned long long line, + unsigned long long column, + const std::string& description); + + parsing (const parser&, const std::string& description); + + const std::string& + name () const {return name_;} + + unsigned long long + line () const {return line_;} + + unsigned long long + column () const {return column_;} + + const std::string& + description () const {return description_;} + + virtual const char* + what () const throw (); + + private: + void + init (); + + private: + std::string name_; + unsigned long long line_; + unsigned long long column_; + std::string description_; + std::string what_; + }; + + class LIBSTUDXML_EXPORT parser + { + public: + typedef xml::qname qname_type; + typedef xml::content content_type; + + typedef unsigned short feature_type; + + // If both receive_attributes_event and receive_attributes_map are + // specified, then receive_attributes_event is assumed. + // + static const feature_type receive_elements = 0x0001; + static const feature_type receive_characters = 0x0002; + static const feature_type receive_attributes_map = 0x0004; + static const feature_type receive_attributes_event = 0x0008; + static const feature_type receive_namespace_decls = 0x0010; + + static const feature_type receive_default = receive_elements | + receive_characters | + receive_attributes_map; + + // Parse std::istream. Input name is used in diagnostics to identify + // the document being parsed. + // + // If stream exceptions are enabled then std::ios_base::failure + // exception is used to report io errors (badbit and failbit). + // Otherwise, those are reported as the parsing exception. + // + parser (std::istream&, + const std::string& input_name, + feature_type = receive_default); + + // Parse memory buffer that contains the whole document. Input name + // is used in diagnostics to identify the document being parsed. + // + parser (const void* data, + std::size_t size, + const std::string& input_name, + feature_type = receive_default); + + const std::string& + input_name () const {return iname_;} + + ~parser (); + + private: + parser (const parser&); + parser& operator= (const parser&); + + // Parsing events. + // + public: + enum event_type + { + // If adding new events, also update the stream insertion operator. + // + start_element, + end_element, + start_attribute, + end_attribute, + characters, + start_namespace_decl, + end_namespace_decl, + eof + }; + + event_type + next (); + + // Get the next event and make sure that it's what's expected. If it + // is not, then throw an appropriate parsing exception. + // + void + next_expect (event_type); + + void + next_expect (event_type, const std::string& name); + + void + next_expect (event_type, const qname_type& qname); + + void + next_expect (event_type, const std::string& ns, const std::string& name); + + event_type + peek (); + + // Return the even that was last returned by the call to next() or + // peek(). + // + event_type + event () {return event_;} + + // Event data. + // + public: + const qname_type& qname () const {return *pqname_;} + + const std::string& namespace_ () const {return pqname_->namespace_ ();} + const std::string& name () const {return pqname_->name ();} + const std::string& prefix () const {return pqname_->prefix ();} + + std::string& value () {return *pvalue_;} + const std::string& value () const {return *pvalue_;} + template <typename T> T value () const; + + unsigned long long line () const {return line_;} + unsigned long long column () const {return column_;} + + // Attribute map lookup. If attribute is not found, then the version + // without the default value throws an appropriate parsing exception + // while the version with the default value returns that value. + // + // Note also that there is no attribute(ns,name) version since it + // would conflict with attribute(name,dv) (qualified attributes + // are not very common). + // + // Attribute map is valid throughout at the "element level" until + // end_element and not just during start_element. As a special case, + // the map is still valid after peek() that returned end_element until + // this end_element event is retrieved with next(). + // + const std::string& + attribute (const std::string& name) const; + + template <typename T> + T + attribute (const std::string& name) const; + + std::string + attribute (const std::string& name, + const std::string& default_value) const; + + template <typename T> + T + attribute (const std::string& name, const T& default_value) const; + + const std::string& + attribute (const qname_type& qname) const; + + template <typename T> + T + attribute (const qname_type& qname) const; + + std::string + attribute (const qname_type& qname, + const std::string& default_value) const; + + template <typename T> + T + attribute (const qname_type& qname, const T& default_value) const; + + bool + attribute_present (const std::string& name) const; + + bool + attribute_present (const qname_type& qname) const; + + // Low-level attribute map access. Note that this API assumes + // all attributes are handled. + // + struct attribute_value_type + { + std::string value; + mutable bool handled; + }; + + typedef std::map<qname_type, attribute_value_type> attribute_map_type; + + const attribute_map_type& + attribute_map () const; + + // Optional content processing. + // + public: + // Note that you cannot get/set content while peeking. + // + void + content (content_type); + + content_type + content () const; + + // Versions that also set the content. Event type must be start_element. + // + void + next_expect (event_type, const std::string& name, content_type); + + void + next_expect (event_type, const qname_type& qname, content_type); + + void + next_expect (event_type, + const std::string& ns, const std::string& name, + content_type); + + // Helpers for parsing elements with simple content. The first two + // functions assume that start_element has already been parsed. The + // rest parse the complete element, from start to end. + // + // Note also that as with attribute(), there is no (namespace,name) + // overload since it would conflicts with (namespace,default_value). + // + public: + std::string + element (); + + template <typename T> + T + element (); + + std::string + element (const std::string& name); + + std::string + element (const qname_type& qname); + + template <typename T> + T + element (const std::string& name); + + template <typename T> + T + element (const qname_type& qname); + + std::string + element (const std::string& name, const std::string& default_value); + + std::string + element (const qname_type& qname, const std::string& default_value); + + template <typename T> + T + element (const std::string& name, const T& default_value); + + template <typename T> + T + element (const qname_type& qname, const T& default_value); + + // C++11 range-based for support. Generally, the iterator interface + // doesn't make much sense for the parser so for now we have an + // implementation that is just enough to the range-based for. + // + public: + struct iterator + { + typedef event_type value_type; + + iterator (parser* p = 0, event_type e = eof): p_ (p), e_ (e) {} + value_type operator* () const {return e_;} + iterator& operator++ () {e_ = p_->next (); return *this;} + + // Comparison only makes sense when comparing to end (eof). + // + bool operator== (iterator y) const {return e_ == eof && y.e_ == eof;} + bool operator!= (iterator y) const {return !(*this == y);} + + private: + parser* p_; + event_type e_; + }; + + iterator begin () {return iterator (this, next ());} + iterator end () {return iterator (this, eof);} + + private: + static void XMLCALL + start_element_ (void*, const XML_Char*, const XML_Char**); + + static void XMLCALL + end_element_ (void*, const XML_Char*); + + static void XMLCALL + characters_ (void*, const XML_Char*, int); + + static void XMLCALL + start_namespace_decl_ (void*, const XML_Char*, const XML_Char*); + + static void XMLCALL + end_namespace_decl_ (void*, const XML_Char*); + + private: + void + init (); + + event_type + next_ (bool peek); + + event_type + next_body (); + + void + handle_error (); + + private: + // If size_ is 0, then data is std::istream. Otherwise, it is a buffer. + // + union + { + std::istream* is; + const void* buf; + } data_; + + std::size_t size_; + + const std::string iname_; + feature_type feature_; + + XML_Parser p_; + std::size_t depth_; + bool accumulate_; // Whether we are accumulating character content. + enum {state_next, state_peek} state_; + event_type event_; + event_type queue_; + + qname_type qname_; + std::string value_; + + // These are used to avoid copying when we are handling attributes + // and namespace decls. + // + const qname_type* pqname_; + std::string* pvalue_; + + unsigned long long line_; + unsigned long long column_; + + // Attributes as events. + // + struct attribute_type + { + qname_type qname; + std::string value; + }; + + typedef std::vector<attribute_type> attributes; + + attributes attr_; + attributes::size_type attr_i_; // Index of the current attribute. + + // Namespace declarations. + // + typedef std::vector<qname_type> namespace_decls; + + namespace_decls start_ns_; + namespace_decls::size_type start_ns_i_; // Index of the current decl. + + namespace_decls end_ns_; + namespace_decls::size_type end_ns_i_; // Index of the current decl. + + // Element state consisting of the content model and attribute map. + // + struct element_entry + { + element_entry (std::size_t d, content_type c = content_type::mixed) + : depth (d), content (c), attr_unhandled_ (0) {} + + std::size_t depth; + content_type content; + attribute_map_type attr_map_; + mutable attribute_map_type::size_type attr_unhandled_; + }; + + typedef std::vector<element_entry> element_state; + std::vector<element_entry> element_state_; + + // Empty attribute map to return when an element has no attributes. + // + const attribute_map_type empty_attr_map_; + + // Return the element entry corresponding to the current depth, if + // exists, and NULL otherwise. + // + const element_entry* + get_element () const; + + const element_entry* + get_element_ () const; + + void + pop_element (); + }; + + LIBSTUDXML_EXPORT + std::ostream& + operator<< (std::ostream&, parser::event_type); +} + +#include <xml/parser.ixx> +#include <xml/parser.txx> + +#include <xml/details/post.hxx> + +#endif // XML_PARSER |