From e897aa91a2a5c68a2f795f6a0a995600f13a85f8 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Wed, 14 May 2014 21:29:29 -0700 Subject: Convert to extension-less headers for API --- xml/parser | 473 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 473 insertions(+) create mode 100644 xml/parser (limited to 'xml/parser') diff --git a/xml/parser b/xml/parser new file mode 100644 index 0000000..a1b6250 --- /dev/null +++ b/xml/parser @@ -0,0 +1,473 @@ +// file : xml/parser -*- C++ -*- +// copyright : Copyright (c) 2013-2014 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#ifndef XML_PARSER +#define XML_PARSER + +#include + +#include +#include +#include +#include +#include // std::size_t + +#include // LIBSTUDXML_EXTERNAL_EXPAT + +#ifndef LIBSTUDXML_EXTERNAL_EXPAT +# include +#else +# include +#endif + +// We only support UTF-8 Expat. +// +#ifdef XML_UNICODE +# error UTF-16 expat (XML_UNICODE defined) is not supported +#endif + +#include +#include +#include +#include + +#include + +namespace xml +{ + class parser; + + struct LIBSTUDXML_EXPORT parsing: exception + { + virtual + ~parsing () throw (); + + parsing (const std::string& name, + unsigned long long line, + unsigned long long column, + const std::string& description); + + parsing (const parser&, const std::string& description); + + const std::string& + name () const {return name_;} + + unsigned long long + line () const {return line_;} + + unsigned long long + column () const {return column_;} + + const std::string& + description () const {return description_;} + + virtual const char* + what () const throw (); + + private: + void + init (); + + private: + std::string name_; + unsigned long long line_; + unsigned long long column_; + std::string description_; + std::string what_; + }; + + class LIBSTUDXML_EXPORT parser + { + public: + typedef xml::qname qname_type; + typedef xml::content content_type; + + typedef unsigned short feature_type; + + // If both receive_attributes_event and receive_attributes_map are + // specified, then receive_attributes_event is assumed. + // + static const feature_type receive_elements = 0x0001; + static const feature_type receive_characters = 0x0002; + static const feature_type receive_attributes_map = 0x0004; + static const feature_type receive_attributes_event = 0x0008; + static const feature_type receive_namespace_decls = 0x0010; + + static const feature_type receive_default = receive_elements | + receive_characters | + receive_attributes_map; + + // Parse std::istream. Input name is used in diagnostics to identify + // the document being parsed. + // + // If stream exceptions are enabled then std::ios_base::failure + // exception is used to report io errors (badbit and failbit). + // Otherwise, those are reported as the parsing exception. + // + parser (std::istream&, + const std::string& input_name, + feature_type = receive_default); + + // Parse memory buffer that contains the whole document. Input name + // is used in diagnostics to identify the document being parsed. + // + parser (const void* data, + std::size_t size, + const std::string& input_name, + feature_type = receive_default); + + const std::string& + input_name () const {return iname_;} + + ~parser (); + + private: + parser (const parser&); + parser& operator= (const parser&); + + // Parsing events. + // + public: + enum event_type + { + // If adding new events, also update the stream insertion operator. + // + start_element, + end_element, + start_attribute, + end_attribute, + characters, + start_namespace_decl, + end_namespace_decl, + eof + }; + + event_type + next (); + + // Get the next event and make sure that it's what's expected. If it + // is not, then throw an appropriate parsing exception. + // + void + next_expect (event_type); + + void + next_expect (event_type, const std::string& name); + + void + next_expect (event_type, const qname_type& qname); + + void + next_expect (event_type, const std::string& ns, const std::string& name); + + event_type + peek (); + + // Return the even that was last returned by the call to next() or + // peek(). + // + event_type + event () {return event_;} + + // Event data. + // + public: + const qname_type& qname () const {return *pqname_;} + + const std::string& namespace_ () const {return pqname_->namespace_ ();} + const std::string& name () const {return pqname_->name ();} + const std::string& prefix () const {return pqname_->prefix ();} + + std::string& value () {return *pvalue_;} + const std::string& value () const {return *pvalue_;} + template T value () const; + + unsigned long long line () const {return line_;} + unsigned long long column () const {return column_;} + + // Attribute map lookup. If attribute is not found, then the version + // without the default value throws an appropriate parsing exception + // while the version with the default value returns that value. + // + // Note also that there is no attribute(ns,name) version since it + // would conflict with attribute(name,dv) (qualified attributes + // are not very common). + // + // Attribute map is valid throughout at the "element level" until + // end_element and not just during start_element. As a special case, + // the map is still valid after peek() that returned end_element until + // this end_element event is retrieved with next(). + // + const std::string& + attribute (const std::string& name) const; + + template + T + attribute (const std::string& name) const; + + std::string + attribute (const std::string& name, + const std::string& default_value) const; + + template + T + attribute (const std::string& name, const T& default_value) const; + + const std::string& + attribute (const qname_type& qname) const; + + template + T + attribute (const qname_type& qname) const; + + std::string + attribute (const qname_type& qname, + const std::string& default_value) const; + + template + T + attribute (const qname_type& qname, const T& default_value) const; + + bool + attribute_present (const std::string& name) const; + + bool + attribute_present (const qname_type& qname) const; + + // Low-level attribute map access. Note that this API assumes + // all attributes are handled. + // + struct attribute_value_type + { + std::string value; + mutable bool handled; + }; + + typedef std::map attribute_map_type; + + const attribute_map_type& + attribute_map () const; + + // Optional content processing. + // + public: + // Note that you cannot get/set content while peeking. + // + void + content (content_type); + + content_type + content () const; + + // Versions that also set the content. Event type must be start_element. + // + void + next_expect (event_type, const std::string& name, content_type); + + void + next_expect (event_type, const qname_type& qname, content_type); + + void + next_expect (event_type, + const std::string& ns, const std::string& name, + content_type); + + // Helpers for parsing elements with simple content. The first two + // functions assume that start_element has already been parsed. The + // rest parse the complete element, from start to end. + // + // Note also that as with attribute(), there is no (namespace,name) + // overload since it would conflicts with (namespace,default_value). + // + public: + std::string + element (); + + template + T + element (); + + std::string + element (const std::string& name); + + std::string + element (const qname_type& qname); + + template + T + element (const std::string& name); + + template + T + element (const qname_type& qname); + + std::string + element (const std::string& name, const std::string& default_value); + + std::string + element (const qname_type& qname, const std::string& default_value); + + template + T + element (const std::string& name, const T& default_value); + + template + T + element (const qname_type& qname, const T& default_value); + + // C++11 range-based for support. Generally, the iterator interface + // doesn't make much sense for the parser so for now we have an + // implementation that is just enough to the range-based for. + // + public: + struct iterator + { + typedef event_type value_type; + + iterator (parser* p = 0, event_type e = eof): p_ (p), e_ (e) {} + value_type operator* () const {return e_;} + iterator& operator++ () {e_ = p_->next (); return *this;} + + // Comparison only makes sense when comparing to end (eof). + // + bool operator== (iterator y) const {return e_ == eof && y.e_ == eof;} + bool operator!= (iterator y) const {return !(*this == y);} + + private: + parser* p_; + event_type e_; + }; + + iterator begin () {return iterator (this, next ());} + iterator end () {return iterator (this, eof);} + + private: + static void XMLCALL + start_element_ (void*, const XML_Char*, const XML_Char**); + + static void XMLCALL + end_element_ (void*, const XML_Char*); + + static void XMLCALL + characters_ (void*, const XML_Char*, int); + + static void XMLCALL + start_namespace_decl_ (void*, const XML_Char*, const XML_Char*); + + static void XMLCALL + end_namespace_decl_ (void*, const XML_Char*); + + private: + void + init (); + + event_type + next_ (bool peek); + + event_type + next_body (); + + void + handle_error (); + + private: + // If size_ is 0, then data is std::istream. Otherwise, it is a buffer. + // + union + { + std::istream* is; + const void* buf; + } data_; + + std::size_t size_; + + const std::string iname_; + feature_type feature_; + + XML_Parser p_; + std::size_t depth_; + bool accumulate_; // Whether we are accumulating character content. + enum {state_next, state_peek} state_; + event_type event_; + event_type queue_; + + qname_type qname_; + std::string value_; + + // These are used to avoid copying when we are handling attributes + // and namespace decls. + // + const qname_type* pqname_; + std::string* pvalue_; + + unsigned long long line_; + unsigned long long column_; + + // Attributes as events. + // + struct attribute_type + { + qname_type qname; + std::string value; + }; + + typedef std::vector attributes; + + attributes attr_; + attributes::size_type attr_i_; // Index of the current attribute. + + // Namespace declarations. + // + typedef std::vector namespace_decls; + + namespace_decls start_ns_; + namespace_decls::size_type start_ns_i_; // Index of the current decl. + + namespace_decls end_ns_; + namespace_decls::size_type end_ns_i_; // Index of the current decl. + + // Element state consisting of the content model and attribute map. + // + struct element_entry + { + element_entry (std::size_t d, content_type c = content_type::mixed) + : depth (d), content (c), attr_unhandled_ (0) {} + + std::size_t depth; + content_type content; + attribute_map_type attr_map_; + mutable attribute_map_type::size_type attr_unhandled_; + }; + + typedef std::vector element_state; + std::vector element_state_; + + // Empty attribute map to return when an element has no attributes. + // + const attribute_map_type empty_attr_map_; + + // Return the element entry corresponding to the current depth, if + // exists, and NULL otherwise. + // + const element_entry* + get_element () const; + + const element_entry* + get_element_ () const; + + void + pop_element (); + }; + + LIBSTUDXML_EXPORT + std::ostream& + operator<< (std::ostream&, parser::event_type); +} + +#include +#include + +#include + +#endif // XML_PARSER -- cgit v1.1