aboutsummaryrefslogtreecommitdiff
path: root/libstudxml/parser.hxx
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2017-05-02 21:26:58 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2017-05-02 23:55:21 +0300
commit424e315dfa9a78aebf0653c95f83fe6ed452dd8e (patch)
tree59759d1d4eac4096df104d4dbab24a531ada3399 /libstudxml/parser.hxx
parent3d2b5b2a7064abe35614ebb32db03bd2881adcf0 (diff)
Add hxx extension for headers and libstud prefix for library dir
Diffstat (limited to 'libstudxml/parser.hxx')
-rw-r--r--libstudxml/parser.hxx472
1 files changed, 472 insertions, 0 deletions
diff --git a/libstudxml/parser.hxx b/libstudxml/parser.hxx
new file mode 100644
index 0000000..57a8cdb
--- /dev/null
+++ b/libstudxml/parser.hxx
@@ -0,0 +1,472 @@
+// file : libstudxml/parser.hxx -*- C++ -*-
+// copyright : Copyright (c) 2013-2017 Code Synthesis Tools CC
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBSTUDXML_PARSER_HXX
+#define LIBSTUDXML_PARSER_HXX
+
+#include <libstudxml/details/pre.hxx>
+
+#include <map>
+#include <vector>
+#include <string>
+#include <iosfwd>
+#include <cstddef> // std::size_t
+
+#include <libstudxml/details/config.hxx> // STUDXML_NOTHROW_NOEXCEPT,
+ // LIBSTUDXML_EXTERNAL_EXPAT
+
+#ifndef LIBSTUDXML_EXTERNAL_EXPAT
+# include <libstudxml/details/expat/expat.h>
+#else
+# include <expat.h>
+#endif
+
+// We only support UTF-8 Expat.
+//
+#ifdef XML_UNICODE
+# error UTF-16 expat (XML_UNICODE defined) is not supported
+#endif
+
+#include <libstudxml/forward.hxx>
+#include <libstudxml/qname.hxx>
+#include <libstudxml/content.hxx>
+#include <libstudxml/exception.hxx>
+
+#include <libstudxml/details/export.hxx>
+
+namespace xml
+{
+ class parsing: public exception
+ {
+ public:
+ virtual
+ ~parsing () STUDXML_NOTHROW_NOEXCEPT {}
+
+ parsing (const std::string& name,
+ unsigned long long line,
+ unsigned long long column,
+ const std::string& description);
+
+ parsing (const parser& p, const std::string& description);
+
+ const std::string&
+ name () const {return name_;}
+
+ unsigned long long
+ line () const {return line_;}
+
+ unsigned long long
+ column () const {return column_;}
+
+ const std::string&
+ description () const {return description_;}
+
+ virtual const char*
+ what () const STUDXML_NOTHROW_NOEXCEPT {return what_.c_str ();}
+
+ private:
+ LIBSTUDXML_EXPORT void
+ init ();
+
+ private:
+ std::string name_;
+ unsigned long long line_;
+ unsigned long long column_;
+ std::string description_;
+ std::string what_;
+ };
+
+ class LIBSTUDXML_EXPORT parser
+ {
+ public:
+ typedef xml::qname qname_type;
+ typedef xml::content content_type;
+
+ typedef unsigned short feature_type;
+
+ // If both receive_attributes_event and receive_attributes_map are
+ // specified, then receive_attributes_event is assumed.
+ //
+ static const feature_type receive_elements = 0x0001;
+ static const feature_type receive_characters = 0x0002;
+ static const feature_type receive_attributes_map = 0x0004;
+ static const feature_type receive_attributes_event = 0x0008;
+ static const feature_type receive_namespace_decls = 0x0010;
+
+ static const feature_type receive_default = receive_elements |
+ receive_characters |
+ receive_attributes_map;
+
+ // Parse std::istream. Input name is used in diagnostics to identify
+ // the document being parsed.
+ //
+ // If stream exceptions are enabled then std::ios_base::failure
+ // exception is used to report io errors (badbit and failbit).
+ // Otherwise, those are reported as the parsing exception.
+ //
+ parser (std::istream&,
+ const std::string& input_name,
+ feature_type = receive_default);
+
+ // Parse memory buffer that contains the whole document. Input name
+ // is used in diagnostics to identify the document being parsed.
+ //
+ parser (const void* data,
+ std::size_t size,
+ const std::string& input_name,
+ feature_type = receive_default);
+
+ const std::string&
+ input_name () const {return iname_;}
+
+ ~parser ();
+
+ private:
+ parser (const parser&);
+ parser& operator= (const parser&);
+
+ // Parsing events.
+ //
+ public:
+ enum event_type
+ {
+ // If adding new events, also update the stream insertion operator.
+ //
+ start_element,
+ end_element,
+ start_attribute,
+ end_attribute,
+ characters,
+ start_namespace_decl,
+ end_namespace_decl,
+ eof
+ };
+
+ event_type
+ next ();
+
+ // Get the next event and make sure that it's what's expected. If it
+ // is not, then throw an appropriate parsing exception.
+ //
+ void
+ next_expect (event_type);
+
+ void
+ next_expect (event_type, const std::string& name);
+
+ void
+ next_expect (event_type, const qname_type& qname);
+
+ void
+ next_expect (event_type, const std::string& ns, const std::string& name);
+
+ event_type
+ peek ();
+
+ // Return the even that was last returned by the call to next() or
+ // peek().
+ //
+ event_type
+ event () {return event_;}
+
+ // Event data.
+ //
+ public:
+ const qname_type& qname () const {return *pqname_;}
+
+ const std::string& namespace_ () const {return pqname_->namespace_ ();}
+ const std::string& name () const {return pqname_->name ();}
+ const std::string& prefix () const {return pqname_->prefix ();}
+
+ std::string& value () {return *pvalue_;}
+ const std::string& value () const {return *pvalue_;}
+ template <typename T> T value () const;
+
+ unsigned long long line () const {return line_;}
+ unsigned long long column () const {return column_;}
+
+ // Attribute map lookup. If attribute is not found, then the version
+ // without the default value throws an appropriate parsing exception
+ // while the version with the default value returns that value.
+ //
+ // Note also that there is no attribute(ns,name) version since it
+ // would conflict with attribute(name,dv) (qualified attributes
+ // are not very common).
+ //
+ // Attribute map is valid throughout at the "element level" until
+ // end_element and not just during start_element. As a special case,
+ // the map is still valid after peek() that returned end_element until
+ // this end_element event is retrieved with next().
+ //
+ const std::string&
+ attribute (const std::string& name) const;
+
+ template <typename T>
+ T
+ attribute (const std::string& name) const;
+
+ std::string
+ attribute (const std::string& name,
+ const std::string& default_value) const;
+
+ template <typename T>
+ T
+ attribute (const std::string& name, const T& default_value) const;
+
+ const std::string&
+ attribute (const qname_type& qname) const;
+
+ template <typename T>
+ T
+ attribute (const qname_type& qname) const;
+
+ std::string
+ attribute (const qname_type& qname,
+ const std::string& default_value) const;
+
+ template <typename T>
+ T
+ attribute (const qname_type& qname, const T& default_value) const;
+
+ bool
+ attribute_present (const std::string& name) const;
+
+ bool
+ attribute_present (const qname_type& qname) const;
+
+ // Low-level attribute map access. Note that this API assumes
+ // all attributes are handled.
+ //
+ struct attribute_value_type
+ {
+ std::string value;
+ mutable bool handled;
+ };
+
+ typedef std::map<qname_type, attribute_value_type> attribute_map_type;
+
+ const attribute_map_type&
+ attribute_map () const;
+
+ // Optional content processing.
+ //
+ public:
+ // Note that you cannot get/set content while peeking.
+ //
+ void
+ content (content_type);
+
+ content_type
+ content () const;
+
+ // Versions that also set the content. Event type must be start_element.
+ //
+ void
+ next_expect (event_type, const std::string& name, content_type);
+
+ void
+ next_expect (event_type, const qname_type& qname, content_type);
+
+ void
+ next_expect (event_type,
+ const std::string& ns, const std::string& name,
+ content_type);
+
+ // Helpers for parsing elements with simple content. The first two
+ // functions assume that start_element has already been parsed. The
+ // rest parse the complete element, from start to end.
+ //
+ // Note also that as with attribute(), there is no (namespace,name)
+ // overload since it would conflicts with (namespace,default_value).
+ //
+ public:
+ std::string
+ element ();
+
+ template <typename T>
+ T
+ element ();
+
+ std::string
+ element (const std::string& name);
+
+ std::string
+ element (const qname_type& qname);
+
+ template <typename T>
+ T
+ element (const std::string& name);
+
+ template <typename T>
+ T
+ element (const qname_type& qname);
+
+ std::string
+ element (const std::string& name, const std::string& default_value);
+
+ std::string
+ element (const qname_type& qname, const std::string& default_value);
+
+ template <typename T>
+ T
+ element (const std::string& name, const T& default_value);
+
+ template <typename T>
+ T
+ element (const qname_type& qname, const T& default_value);
+
+ // C++11 range-based for support. Generally, the iterator interface
+ // doesn't make much sense for the parser so for now we have an
+ // implementation that is just enough to the range-based for.
+ //
+ public:
+ struct iterator
+ {
+ typedef event_type value_type;
+
+ iterator (parser* p = 0, event_type e = eof): p_ (p), e_ (e) {}
+ value_type operator* () const {return e_;}
+ iterator& operator++ () {e_ = p_->next (); return *this;}
+
+ // Comparison only makes sense when comparing to end (eof).
+ //
+ bool operator== (iterator y) const {return e_ == eof && y.e_ == eof;}
+ bool operator!= (iterator y) const {return !(*this == y);}
+
+ private:
+ parser* p_;
+ event_type e_;
+ };
+
+ iterator begin () {return iterator (this, next ());}
+ iterator end () {return iterator (this, eof);}
+
+ private:
+ static void XMLCALL
+ start_element_ (void*, const XML_Char*, const XML_Char**);
+
+ static void XMLCALL
+ end_element_ (void*, const XML_Char*);
+
+ static void XMLCALL
+ characters_ (void*, const XML_Char*, int);
+
+ static void XMLCALL
+ start_namespace_decl_ (void*, const XML_Char*, const XML_Char*);
+
+ static void XMLCALL
+ end_namespace_decl_ (void*, const XML_Char*);
+
+ private:
+ void
+ init ();
+
+ event_type
+ next_ (bool peek);
+
+ event_type
+ next_body ();
+
+ void
+ handle_error ();
+
+ private:
+ // If size_ is 0, then data is std::istream. Otherwise, it is a buffer.
+ //
+ union
+ {
+ std::istream* is;
+ const void* buf;
+ } data_;
+
+ std::size_t size_;
+
+ const std::string iname_;
+ feature_type feature_;
+
+ XML_Parser p_;
+ std::size_t depth_;
+ bool accumulate_; // Whether we are accumulating character content.
+ enum {state_next, state_peek} state_;
+ event_type event_;
+ event_type queue_;
+
+ qname_type qname_;
+ std::string value_;
+
+ // These are used to avoid copying when we are handling attributes
+ // and namespace decls.
+ //
+ const qname_type* pqname_;
+ std::string* pvalue_;
+
+ unsigned long long line_;
+ unsigned long long column_;
+
+ // Attributes as events.
+ //
+ struct attribute_type
+ {
+ qname_type qname;
+ std::string value;
+ };
+
+ typedef std::vector<attribute_type> attributes;
+
+ attributes attr_;
+ attributes::size_type attr_i_; // Index of the current attribute.
+
+ // Namespace declarations.
+ //
+ typedef std::vector<qname_type> namespace_decls;
+
+ namespace_decls start_ns_;
+ namespace_decls::size_type start_ns_i_; // Index of the current decl.
+
+ namespace_decls end_ns_;
+ namespace_decls::size_type end_ns_i_; // Index of the current decl.
+
+ // Element state consisting of the content model and attribute map.
+ //
+ struct element_entry
+ {
+ element_entry (std::size_t d, content_type c = content_type::mixed)
+ : depth (d), content (c), attr_unhandled_ (0) {}
+
+ std::size_t depth;
+ content_type content;
+ attribute_map_type attr_map_;
+ mutable attribute_map_type::size_type attr_unhandled_;
+ };
+
+ typedef std::vector<element_entry> element_state;
+ std::vector<element_entry> element_state_;
+
+ // Empty attribute map to return when an element has no attributes.
+ //
+ const attribute_map_type empty_attr_map_;
+
+ // Return the element entry corresponding to the current depth, if
+ // exists, and NULL otherwise.
+ //
+ const element_entry*
+ get_element () const;
+
+ const element_entry*
+ get_element_ () const;
+
+ void
+ pop_element ();
+ };
+
+ LIBSTUDXML_EXPORT std::ostream&
+ operator<< (std::ostream&, parser::event_type);
+}
+
+#include <libstudxml/parser.ixx>
+#include <libstudxml/parser.txx>
+
+#include <libstudxml/details/post.hxx>
+
+#endif // LIBSTUDXML_PARSER_HXX