From 387fbf589e73d96af9050afa121ef8b5e5370c4e Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Wed, 30 Apr 2014 19:51:27 +0200 Subject: Add support for parsing memory buffer directly --- xml/parser.cxx | 108 ++++++++++++++++++++++++++++++++++++--------------------- xml/parser.hxx | 28 +++++++++++++-- xml/parser.ixx | 29 ++++++++++++++++ 3 files changed, 123 insertions(+), 42 deletions(-) diff --git a/xml/parser.cxx b/xml/parser.cxx index 4079d3f..d9d2fcb 100644 --- a/xml/parser.cxx +++ b/xml/parser.cxx @@ -85,13 +85,21 @@ namespace xml XML_ParserFree (p_); } - parser:: - parser (istream& is, const string& iname, feature_type f) - : is_ (is), iname_ (iname), feature_ (f), - depth_ (0), state_ (state_next), event_ (eof), queue_ (eof), - pqname_ (&qname_), pvalue_ (&value_), - attr_i_ (0), start_ns_i_ (0), end_ns_i_ (0) + void parser:: + init () { + depth_ = 0; + state_ = state_next; + event_ = eof; + queue_ = eof; + + pqname_ = &qname_; + pvalue_ = &value_; + + attr_i_ = 0; + start_ns_i_ = 0; + end_ns_i_ = 0; + if ((feature_ & receive_attributes_map) != 0 && (feature_ & receive_attributes_event) != 0) feature_ &= ~receive_attributes_map; @@ -112,16 +120,16 @@ namespace xml // XML_SetUserData(p_, this); - if ((f & receive_elements) != 0) + if ((feature_ & receive_elements) != 0) { XML_SetStartElementHandler (p_, &start_element_); XML_SetEndElementHandler (p_, &end_element_); } - if ((f & receive_characters) != 0) + if ((feature_ & receive_characters) != 0) XML_SetCharacterDataHandler (p_, &characters_); - if ((f & receive_namespace_decls) != 0) + if ((feature_ & receive_namespace_decls) != 0) XML_SetNamespaceDeclHandler (p_, &start_namespace_decl_, &end_namespace_decl_); @@ -276,26 +284,24 @@ namespace xml } const parser::element_entry* parser:: - get_element () const + get_element_ () const { // The start_element_() Expat handler may have already provisioned // an entry in the element stack. In this case, we need to get the // one before it, if any. // const element_entry* r (0); - element_state::size_type n (element_state_.size ()); - if (n != 0) + element_state::size_type n (element_state_.size () - 1); + + if (element_state_[n].depth == depth_) + r = &element_state_[n]; + else if (n != 0 && element_state_[n].depth > depth_) { n--; if (element_state_[n].depth == depth_) r = &element_state_[n]; - else if (n != 0 && element_state_[n].depth > depth_) - { - n--; - if (element_state_[n].depth == depth_) - r = &element_state_[n]; - } } + return r; } @@ -354,15 +360,17 @@ namespace xml } case start_element: { - const element_entry* e (get_element ()); - switch (e != 0 ? e->content : mixed) + if (const element_entry* e = get_element ()) { - case empty: - throw parsing (*this, "element in empty content"); - case simple: - throw parsing (*this, "element in simple content"); - default: - break; + switch (e->content) + { + case empty: + throw parsing (*this, "element in empty content"); + case simple: + throw parsing (*this, "element in simple content"); + default: + break; + } } // If this is a peek, then delay adjusting the depth. @@ -564,26 +572,46 @@ namespace xml XML_Status s; do { - const size_t cap (4096); + if (size_ != 0) + { + s = XML_Parse (p_, + static_cast (data_.buf), + static_cast (size_), + true); - char* b (static_cast (XML_GetBuffer (p_, cap))); - if (b == 0) - throw bad_alloc (); + if (s == XML_STATUS_ERROR) + handle_error (); - // Temporarily unset the exception failbit. Also clear the fail bit - // when we reset the old state if it was caused by eof. - // - { - stream_exception_controller sec (is_); - is_.read (b, static_cast (cap)); + break; } + else + { + const size_t cap (4096); - s = XML_ParseBuffer (p_, static_cast (is_.gcount ()), is_.eof ()); + char* b (static_cast (XML_GetBuffer (p_, cap))); + if (b == 0) + throw bad_alloc (); - if (s == XML_STATUS_ERROR) - handle_error (); + // Temporarily unset the exception failbit. Also clear the fail bit + // when we reset the old state if it was caused by eof. + // + istream& is (*data_.is); + { + stream_exception_controller sec (is); + is.read (b, static_cast (cap)); + } + + bool eof (is.eof ()); - } while (s != XML_STATUS_SUSPENDED && !is_.eof ()); + s = XML_ParseBuffer (p_, static_cast (is.gcount ()), eof); + + if (s == XML_STATUS_ERROR) + handle_error (); + + if (eof) + break; + } + } while (s != XML_STATUS_SUSPENDED); return event_; } diff --git a/xml/parser.hxx b/xml/parser.hxx index ab31959..1c9d389 100644 --- a/xml/parser.hxx +++ b/xml/parser.hxx @@ -105,6 +105,15 @@ namespace xml const std::string& input_name, feature_type = receive_default); + // Parse memory buffer that contains the whole document. Input name + // is used in diagnostics to identify the document being parsed. + // + parser (const void* data, + std::size_t size, + const std::string& input_name, + feature_type = receive_default); + + const std::string& input_name () const {return iname_;} @@ -276,7 +285,7 @@ namespace xml // Optional content processing. // - public: + public: enum content_type { // element characters whitespaces notes @@ -329,6 +338,9 @@ namespace xml end_namespace_decl_ (void*, const XML_Char*); private: + void + init (); + event_type next_ (bool peek); @@ -339,7 +351,16 @@ namespace xml handle_error (); private: - std::istream& is_; + // If size_ is 0, then data is std::istream. Otherwise, it is a buffer. + // + union + { + std::istream* is; + const void* buf; + } data_; + + std::size_t size_; + const std::string iname_; feature_type feature_; @@ -411,6 +432,9 @@ namespace xml const element_entry* get_element () const; + const element_entry* + get_element_ () const; + void pop_element (); }; diff --git a/xml/parser.ixx b/xml/parser.ixx index ac27b46..9041b0c 100644 --- a/xml/parser.ixx +++ b/xml/parser.ixx @@ -2,10 +2,33 @@ // copyright : Copyright (c) 2013-2014 Code Synthesis Tools CC // license : MIT; see accompanying LICENSE file +#include + #include namespace xml { + inline parser:: + parser (std::istream& is, const std::string& iname, feature_type f) + : size_ (0), iname_ (iname), feature_ (f) + { + data_.is = &is; + init (); + } + + inline parser:: + parser (const void* data, + std::size_t size, + const std::string& iname, + feature_type f) + : size_ (size), iname_ (iname), feature_ (f) + { + assert (data != 0 && size != 0); + + data_.buf = data; + init (); + } + template inline T parser:: value () const @@ -75,4 +98,10 @@ namespace xml { return next_expect (e, std::string (), n); } + + inline const parser::element_entry* parser:: + get_element () const + { + return element_state_.empty () ? 0 : get_element_ (); + } } -- cgit v1.1