From 8e761289a2446367267c6c0d9a26e734f0f78306 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Wed, 16 Dec 2020 20:29:05 +0300 Subject: Get rid of legacy build systems and rename cutl/ to libcutl/ --- cutl/xml/parser.cxx | 826 ---------------------------------------------------- 1 file changed, 826 deletions(-) delete mode 100644 cutl/xml/parser.cxx (limited to 'cutl/xml/parser.cxx') diff --git a/cutl/xml/parser.cxx b/cutl/xml/parser.cxx deleted file mode 100644 index 11e3e97..0000000 --- a/cutl/xml/parser.cxx +++ /dev/null @@ -1,826 +0,0 @@ -// file : cutl/xml/parser.cxx -// license : MIT; see accompanying LICENSE file - -#include // std::bad_alloc -#include -#include // std::strchr -#include -#include -#include - -#include - -using namespace std; - -namespace cutl -{ - namespace xml - { - // parsing - // - parsing:: - ~parsing () throw () {} - - parsing:: - parsing (const string& n, - unsigned long long l, - unsigned long long c, - const string& d) - : name_ (n), line_ (l), column_ (c), description_ (d) - { - init (); - } - - parsing:: - parsing (const parser& p, const std::string& d) - : name_ (p.input_name ()), - line_ (p.line ()), - column_ (p.column ()), - description_ (d) - { - init (); - } - - void parsing:: - init () - { - ostringstream os; - if (!name_.empty ()) - os << name_ << ':'; - os << line_ << ':' << column_ << ": error: " << description_; - what_ = os.str (); - } - - char const* parsing:: - what () const throw () - { - return what_.c_str (); - } - - // parser::event_type - // - static const char* parser_event_str[] = - { - "start element", - "end element", - "start attribute", - "end attribute", - "characters", - "start namespace declaration", - "end namespace declaration", - "end of file" - }; - - ostream& - operator<< (ostream& os, parser::event_type e) - { - return os << parser_event_str[e]; - } - - // parser - // - parser:: - ~parser () - { - if (p_ != 0) - XML_ParserFree (p_); - } - - parser:: - parser (istream& is, const string& iname, feature_type f) - : is_ (is), iname_ (iname), feature_ (f), - depth_ (0), state_ (state_next), event_ (eof), queue_ (eof), - pqname_ (&qname_), pvalue_ (&value_), - attr_i_ (0), start_ns_i_ (0), end_ns_i_ (0) - { - if ((feature_ & receive_attributes_map) != 0 && - (feature_ & receive_attributes_event) != 0) - feature_ &= ~receive_attributes_map; - - // Allocate the parser. Make sure nothing else can throw after - // this call since otherwise we will leak it. - // - p_ = XML_ParserCreateNS (0, XML_Char (' ')); - - if (p_ == 0) - throw bad_alloc (); - - // Get prefixes in addition to namespaces and local names. - // - XML_SetReturnNSTriplet (p_, true); - - // Set handlers. - // - XML_SetUserData(p_, this); - - if ((f & receive_elements) != 0) - { - XML_SetStartElementHandler (p_, &start_element_); - XML_SetEndElementHandler (p_, &end_element_); - } - - if ((f & receive_characters) != 0) - XML_SetCharacterDataHandler (p_, &characters_); - - if ((f & receive_namespace_decls) != 0) - XML_SetNamespaceDeclHandler (p_, - &start_namespace_decl_, - &end_namespace_decl_); - } - - void parser:: - handle_error () - { - XML_Error e (XML_GetErrorCode (p_)); - - if (e == XML_ERROR_ABORTED) - { - // For now we only abort the parser in the characters_() handler. - // - switch (content ()) - { - case empty: - throw parsing (*this, "character in empty content"); - case complex: - throw parsing (*this, "character in complex content"); - default: - assert (false); - } - } - else - throw parsing (iname_, - XML_GetCurrentLineNumber (p_), - XML_GetCurrentColumnNumber (p_), - XML_ErrorString (e)); - } - - struct stream_exception_controller - { - ~stream_exception_controller () - { - istream::iostate s = is_.rdstate (); - s &= ~istream::failbit; - - // If our error state (sans failbit) intersects with the - // exception state then that means we have an active - // exception and changing error/exception state will - // cause another to be thrown. - // - if (!(old_state_ & s)) - { - // Clear failbit if it was caused by eof. - // - if (is_.fail () && is_.eof ()) - is_.clear (s); - - is_.exceptions (old_state_); - } - } - - stream_exception_controller (istream& is) - : is_ (is), old_state_ (is_.exceptions ()) - { - is_.exceptions (old_state_ & ~istream::failbit); - } - - private: - stream_exception_controller (const stream_exception_controller&); - - stream_exception_controller& - operator= (const stream_exception_controller&); - - private: - istream& is_; - istream::iostate old_state_; - }; - - const string& parser:: - attribute (const qname_type& qn) const - { - if (const element_entry* e = get_element ()) - { - attribute_map_type::const_iterator i (e->attr_map_.find (qn)); - - if (i != e->attr_map_.end ()) - { - if (!i->second.handled) - { - i->second.handled = true; - e->attr_unhandled_--; - } - return i->second.value; - } - } - - throw parsing (*this, "attribute '" + qn.string () + "' expected"); - } - - string parser:: - attribute (const qname_type& qn, const string& dv) const - { - if (const element_entry* e = get_element ()) - { - attribute_map_type::const_iterator i (e->attr_map_.find (qn)); - - if (i != e->attr_map_.end ()) - { - if (!i->second.handled) - { - i->second.handled = true; - e->attr_unhandled_--; - } - return i->second.value; - } - } - - return dv; - } - - bool parser:: - attribute_present (const qname_type& qn) const - { - if (const element_entry* e = get_element ()) - { - attribute_map_type::const_iterator i (e->attr_map_.find (qn)); - - if (i != e->attr_map_.end ()) - { - if (!i->second.handled) - { - i->second.handled = true; - e->attr_unhandled_--; - } - return true; - } - } - - return false; - } - - void parser:: - next_expect (event_type e) - { - if (next () != e) - throw parsing (*this, string (parser_event_str[e]) + " expected"); - } - - void parser:: - next_expect (event_type e, const string& ns, const string& n) - { - if (next () != e || namespace_ () != ns || name () != n) - throw parsing (*this, - string (parser_event_str[e]) + " '" + - qname_type (ns, n).string () + "' expected"); - } - - const parser::element_entry* parser:: - get_element () const - { - // The start_element_() Expat handler may have already provisioned - // an entry in the element stack. In this case, we need to get the - // one before it, if any. - // - const element_entry* r (0); - element_state::size_type n (element_state_.size ()); - if (n != 0) - { - n--; - if (element_state_[n].depth == depth_) - r = &element_state_[n]; - else if (n != 0 && element_state_[n].depth > depth_) - { - n--; - if (element_state_[n].depth == depth_) - r = &element_state_[n]; - } - } - return r; - } - - void parser:: - pop_element () - { - // Make sure there are no unhandled attributes left. - // - const element_entry& e (element_state_.back ()); - if (e.attr_unhandled_ != 0) - { - // Find the first unhandled attribute and report it. - // - for (attribute_map_type::const_iterator i (e.attr_map_.begin ()); - i != e.attr_map_.end (); ++i) - { - if (!i->second.handled) - throw parsing ( - *this, "unexpected attribute '" + i->first.string () + "'"); - } - assert (false); - } - - element_state_.pop_back (); - } - - parser::event_type parser:: - next_ (bool peek) - { - event_type e (next_body ()); - - // Content-specific processing. Note that we handle characters in the - // characters_() Expat handler for two reasons. Firstly, it is faster - // to ignore the whitespaces at the source. Secondly, this allows us - // to distinguish between element and attribute characters. We can - // move this processing to the handler because the characters event - // is never queued. - // - switch (e) - { - case end_element: - { - // If this is a peek, then avoid popping the stack just yet. - // This way, the attribute map will still be valid until we - // call next(). - // - if (!peek) - { - if (!element_state_.empty () && - element_state_.back ().depth == depth_) - pop_element (); - - depth_--; - } - break; - } - case start_element: - { - const element_entry* e (get_element ()); - switch (e != 0 ? e->content : mixed) - { - case empty: - throw parsing (*this, "element in empty content"); - case simple: - throw parsing (*this, "element in simple content"); - default: - break; - } - - // If this is a peek, then delay adjusting the depth. - // - if (!peek) - depth_++; - - break; - } - default: - break; - } - - return e; - } - - parser::event_type parser:: - next_body () - { - // See if we have any start namespace declarations we need to return. - // - if (start_ns_i_ < start_ns_.size ()) - { - // Based on the previous event determine what's the next one must be. - // - switch (event_) - { - case start_namespace_decl: - { - if (++start_ns_i_ == start_ns_.size ()) - { - start_ns_i_ = 0; - start_ns_.clear (); - pqname_ = &qname_; - break; // No more declarations. - } - // Fall through. - } - case start_element: - { - event_ = start_namespace_decl; - pqname_ = &start_ns_[start_ns_i_]; - return event_; - } - default: - { - assert (false); - return event_ = eof; - } - } - } - - // See if we have any attributes we need to return as events. - // - if (attr_i_ < attr_.size ()) - { - // Based on the previous event determine what's the next one must be. - // - switch (event_) - { - case start_attribute: - { - event_ = characters; - pvalue_ = &attr_[attr_i_].value; - return event_; - } - case characters: - { - event_ = end_attribute; // Name is already set. - return event_; - } - case end_attribute: - { - if (++attr_i_ == attr_.size ()) - { - attr_i_ = 0; - attr_.clear (); - pqname_ = &qname_; - pvalue_ = &value_; - break; // No more attributes. - } - // Fall through. - } - case start_element: - case start_namespace_decl: - { - event_ = start_attribute; - pqname_ = &attr_[attr_i_].qname; - return event_; - } - default: - { - assert (false); - return event_ = eof; - } - } - } - - // See if we have any end namespace declarations we need to return. - // - if (end_ns_i_ < end_ns_.size ()) - { - // Based on the previous event determine what's the next one must be. - // - switch (event_) - { - case end_namespace_decl: - { - if (++end_ns_i_ == end_ns_.size ()) - { - end_ns_i_ = 0; - end_ns_.clear (); - pqname_ = &qname_; - break; // No more declarations. - } - // Fall through. - } - // The end namespace declaration comes before the end element - // which means it can follow pretty much any other event. - // - default: - { - event_ = end_namespace_decl; - pqname_ = &end_ns_[end_ns_i_]; - return event_; - } - } - } - - // Check the queue. - // - if (queue_ != eof) - { - event_ = queue_; - queue_ = eof; - return event_; - } - - XML_ParsingStatus ps; - XML_GetParsingStatus (p_, &ps); - - switch (ps.parsing) - { - case XML_INITIALIZED: - { - // As if we finished the previous chunk. - break; - } - case XML_PARSING: - { - assert (false); - return event_ = eof; - } - case XML_FINISHED: - { - return event_ = eof; - } - case XML_SUSPENDED: - { - switch (XML_ResumeParser (p_)) - { - case XML_STATUS_SUSPENDED: - { - // If the parser is again in the suspended state, then - // that means we have the next event. - // - return event_; - } - case XML_STATUS_OK: - { - // Otherwise, we need to get and parse the next chunk of data - // unless this was the last chunk, in which case this is eof. - // - if (ps.finalBuffer) - return event_ = eof; - - break; - } - case XML_STATUS_ERROR: - handle_error (); - } - - break; - } - } - - // Get and parse the next chunk of data until we get the next event - // or reach eof. - // - event_ = eof; - XML_Status s; - do - { - const size_t cap (4096); - - char* b (static_cast (XML_GetBuffer (p_, cap))); - if (b == 0) - throw bad_alloc (); - - // Temporarily unset the exception failbit. Also clear the fail bit - // when we reset the old state if it was caused by eof. - // - { - stream_exception_controller sec (is_); - is_.read (b, static_cast (cap)); - } - - s = XML_ParseBuffer (p_, static_cast (is_.gcount ()), is_.eof ()); - - if (s == XML_STATUS_ERROR) - handle_error (); - - } while (s != XML_STATUS_SUSPENDED && !is_.eof ()); - - return event_; - } - - static void - split_name (const XML_Char* s, qname& qn) - { - string& ns (qn.namespace_ ()); - string& name (qn.name ()); - string& prefix (qn.prefix ()); - - const char* p (strchr (s, ' ')); - - if (p == 0) - { - ns.clear (); - name = s; - prefix.clear (); - } - else - { - ns.assign (s, 0, p - s); - - s = p + 1; - p = strchr (s, ' '); - - if (p == 0) - { - name = s; - prefix.clear (); - } - else - { - name.assign (s, 0, p - s); - prefix = p + 1; - } - } - } - - void XMLCALL parser:: - start_element_ (void* v, const XML_Char* name, const XML_Char** atts) - { - parser& p (*static_cast (v)); - - XML_ParsingStatus ps; - XML_GetParsingStatus (p.p_, &ps); - - // Expat has a (mis)-feature of a possibily calling handlers even - // after the non-resumable XML_StopParser call. - // - if (ps.parsing == XML_FINISHED) - return; - - // Cannot be a followup event. - // - assert (ps.parsing == XML_PARSING); - - p.event_ = start_element; - split_name (name, p.qname_); - - p.line_ = XML_GetCurrentLineNumber (p.p_); - p.column_ = XML_GetCurrentColumnNumber (p.p_); - - // Handle attributes. - // - if (*atts != 0) - { - bool am ((p.feature_ & receive_attributes_map) != 0); - bool ae ((p.feature_ & receive_attributes_event) != 0); - - // Provision an entry for this element. - // - element_entry* pe (0); - if (am) - { - p.element_state_.push_back (element_entry (p.depth_ + 1)); - pe = &p.element_state_.back (); - } - - if (am || ae) - { - for (; *atts != 0; atts += 2) - { - if (am) - { - qname_type qn; - split_name (*atts, qn); - attribute_map_type::value_type v (qn, attribute_value_type ()); - v.second.value = *(atts + 1); - v.second.handled = false; - pe->attr_map_.insert (v); - } - else - { - p.attr_.push_back (attribute_type ()); - split_name (*atts, p.attr_.back ().qname); - p.attr_.back ().value = *(atts + 1); - } - } - - if (am) - pe->attr_unhandled_ = pe->attr_map_.size (); - } - } - - XML_StopParser (p.p_, true); - } - - void XMLCALL parser:: - end_element_ (void* v, const XML_Char* name) - { - parser& p (*static_cast (v)); - - XML_ParsingStatus ps; - XML_GetParsingStatus (p.p_, &ps); - - // Expat has a (mis)-feature of a possibily calling handlers even - // after the non-resumable XML_StopParser call. - // - if (ps.parsing == XML_FINISHED) - return; - - // This can be a followup event for empty elements (). In this - // case the element name is already set. - // - if (ps.parsing != XML_PARSING) - p.queue_ = end_element; - else - { - // We may also have the end namespace declaration events which - // should come before the end element. If that's the case, then - // queue the end element and return the end namespace as the next - // event. - // - if (p.end_ns_i_ < p.end_ns_.size ()) - { - p.event_ = end_namespace_decl; - p.queue_ = end_element; - } - else - p.event_ = end_element; - - split_name (name, p.qname_); - - p.line_ = XML_GetCurrentLineNumber (p.p_); - p.column_ = XML_GetCurrentColumnNumber (p.p_); - - XML_StopParser (p.p_, true); - } - } - - void XMLCALL parser:: - characters_ (void* v, const XML_Char* s, int n) - { - parser& p (*static_cast (v)); - - XML_ParsingStatus ps; - XML_GetParsingStatus (p.p_, &ps); - - // Expat has a (mis)-feature of a possibily calling handlers even - // after the non-resumable XML_StopParser call. - // - if (ps.parsing == XML_FINISHED) - return; - - // If this is empty or complex content, see if these are whitespaces. - // - switch (p.content ()) - { - case empty: - case complex: - { - for (int i (0); i != n; ++i) - { - char c (s[i]); - if (c == 0x20 || c == 0x0A || c == 0x0D || c == 0x09) - continue; - - // It would have been easier to throw the exception directly, - // however, the Expat code is most likely not exception safe. - // - p.line_ = XML_GetCurrentLineNumber (p.p_); - p.column_ = XML_GetCurrentColumnNumber (p.p_); - XML_StopParser (p.p_, false); - break; - } - return; - } - default: - break; - } - - // This can be a followup event for another character event. In - // this case simply append the data. - // - if (ps.parsing != XML_PARSING) - { - assert (p.event_ == characters); - p.value_.append (s, n); - } - else - { - p.event_ = characters; - p.value_.assign (s, n); - - p.line_ = XML_GetCurrentLineNumber (p.p_); - p.column_ = XML_GetCurrentColumnNumber (p.p_); - - XML_StopParser (p.p_, true); - } - } - - void XMLCALL parser:: - start_namespace_decl_ (void* v, const XML_Char* prefix, const XML_Char* ns) - { - parser& p (*static_cast (v)); - - XML_ParsingStatus ps; - XML_GetParsingStatus (p.p_, &ps); - - // Expat has a (mis)-feature of a possibily calling handlers even - // after the non-resumable XML_StopParser call. - // - if (ps.parsing == XML_FINISHED) - return; - - p.start_ns_.push_back (qname_type ()); - p.start_ns_.back ().prefix () = (prefix != 0 ? prefix : ""); - p.start_ns_.back ().namespace_ () = (ns != 0 ? ns : ""); - } - - void XMLCALL parser:: - end_namespace_decl_ (void* v, const XML_Char* prefix) - { - parser& p (*static_cast (v)); - - XML_ParsingStatus ps; - XML_GetParsingStatus (p.p_, &ps); - - // Expat has a (mis)-feature of a possibily calling handlers even - // after the non-resumable XML_StopParser call. - // - if (ps.parsing == XML_FINISHED) - return; - - p.end_ns_.push_back (qname_type ()); - p.end_ns_.back ().prefix () = (prefix != 0 ? prefix : ""); - } - } -} -- cgit v1.1