From 707387b10340c7b95db35c8b791e57a2d02ccbaf Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Fri, 15 Mar 2013 20:13:04 +0200 Subject: Make attribute map valid throughout at "element level" --- cutl/xml/parser.cxx | 202 ++++++++++++++++++++++++++++---------------- cutl/xml/parser.hxx | 100 ++++++++++++++++------ cutl/xml/parser.txx | 21 +++-- tests/xml/parser/driver.cxx | 21 +++++ 4 files changed, 236 insertions(+), 108 deletions(-) diff --git a/cutl/xml/parser.cxx b/cutl/xml/parser.cxx index ee62a24..f57d9fd 100644 --- a/cutl/xml/parser.cxx +++ b/cutl/xml/parser.cxx @@ -92,7 +92,7 @@ namespace cutl : is_ (is), iname_ (iname), feature_ (f), depth_ (0), state_ (state_next), event_ (eof), queue_ (eof), pqname_ (&qname_), pvalue_ (&value_), - attr_unhandled_ (0), attr_i_ (0), start_ns_i_ (0), end_ns_i_ (0) + attr_i_ (0), start_ns_i_ (0), end_ns_i_ (0) { if ((feature_ & receive_attributes_map) != 0 && (feature_ & receive_attributes_event) != 0) @@ -198,37 +198,43 @@ namespace cutl const string& parser:: attribute (const qname_type& qn) const { - attribute_map::const_iterator i (attr_map_.find (qn)); - - if (i != attr_map_.end ()) + if (const element_entry* e = get_element ()) { - if (!i->second.handled) + attribute_map::const_iterator i (e->attr_map_.find (qn)); + + if (i != e->attr_map_.end ()) { - i->second.handled = true; - attr_unhandled_--; + if (!i->second.handled) + { + i->second.handled = true; + e->attr_unhandled_--; + } + return i->second.value; } - return i->second.value; } - else - throw parsing (*this, "attribute '" + qn.string () + "' expected"); + + throw parsing (*this, "attribute '" + qn.string () + "' expected"); } string parser:: attribute (const qname_type& qn, const string& dv) const { - attribute_map::const_iterator i (attr_map_.find (qn)); - - if (i != attr_map_.end ()) + if (const element_entry* e = get_element ()) { - if (!i->second.handled) + attribute_map::const_iterator i (e->attr_map_.find (qn)); + + if (i != e->attr_map_.end ()) { - i->second.handled = true; - attr_unhandled_--; + if (!i->second.handled) + { + i->second.handled = true; + e->attr_unhandled_--; + } + return i->second.value; } - return i->second.value; } - else - return dv; + + return dv; } void parser:: @@ -247,8 +253,55 @@ namespace cutl qname_type (ns, n).string () + "' expected"); } + const parser::element_entry* parser:: + get_element () const + { + // The start_element_() Expat handler may have already provisioned + // an entry in the element stack. In this case, we need to get the + // one before it, if any. + // + const element_entry* r (0); + element_state::size_type n (element_state_.size ()); + if (n != 0) + { + n--; + if (element_state_[n].depth == depth_) + r = &element_state_[n]; + else if (n != 0 && element_state_[n].depth > depth_) + { + n--; + if (element_state_[n].depth == depth_) + r = &element_state_[n]; + } + } + return r; + } + + void parser:: + pop_element () + { + // Make sure there are no unhandled attributes left. + // + const element_entry& e (element_state_.back ()); + if (e.attr_unhandled_ != 0) + { + // Find the first unhandled attribute and report it. + // + for (attribute_map::const_iterator i (e.attr_map_.begin ()); + i != e.attr_map_.end (); ++i) + { + if (!i->second.handled) + throw parsing ( + *this, "unexpected attribute '" + i->first.string () + "'"); + } + assert (false); + } + + element_state_.pop_back (); + } + parser::event_type parser:: - next_ () + next_ (bool peek) { event_type e (next_body ()); @@ -261,9 +314,26 @@ namespace cutl // switch (e) { + case end_element: + { + // If this is a peek, then avoid popping the stack just yet. + // This way, the attribute map will still be valid until we + // call next(). + // + if (!peek) + { + if (!element_state_.empty () && + element_state_.back ().depth == depth_) + pop_element (); + + depth_--; + } + break; + } case start_element: { - switch (content ()) + const element_entry* e (get_element ()); + switch (e != 0 ? e->content : mixed) { case empty: throw parsing (*this, "element in empty content"); @@ -273,15 +343,11 @@ namespace cutl break; } - depth_++; - break; - } - case end_element: - { - if (!content_.empty () && content_.back ().depth == depth_) - content_.pop_back (); + // If this is a peek, then delay adjusting the depth. + // + if (!peek) + depth_++; - depth_--; break; } default: @@ -294,29 +360,6 @@ namespace cutl parser::event_type parser:: next_body () { - // If the previous event is start_element and we return attributes - // as a map, make sure there are no unhandled attributes left. Also - // clear the map. - // - if (event_ == start_element && (feature_ & receive_attributes_map) != 0) - { - if (attr_unhandled_ != 0) - { - // Find the first unhandled attribute and report it. - // - for (attribute_map::const_iterator i (attr_map_.begin ()); - i != attr_map_.end (); ++i) - { - if (!i->second.handled) - throw parsing ( - *this, "unexpected attribute '" + i->first.string () + "'"); - } - assert (false); - } - - attr_map_.clear (); - } - // See if we have any start namespace declarations we need to return. // if (start_ns_i_ < start_ns_.size ()) @@ -576,31 +619,44 @@ namespace cutl // Handle attributes. // - bool am ((p.feature_ & receive_attributes_map) != 0); - bool ae ((p.feature_ & receive_attributes_event) != 0); - if (am || ae) + if (*atts != 0) { - for (; *atts != 0; atts += 2) + bool am ((p.feature_ & receive_attributes_map) != 0); + bool ae ((p.feature_ & receive_attributes_event) != 0); + + // Provision an entry for this element. + // + element_entry* pe (0); + if (am) { - if (am) - { - qname_type qn; - split_name (*atts, qn); - attribute_map::value_type v (qn, attribute_value ()); - v.second.value = *(atts + 1); - v.second.handled = false; - p.attr_map_.insert (v); - } - else + p.element_state_.push_back (element_entry (p.depth_ + 1)); + pe = &p.element_state_.back (); + } + + if (am || ae) + { + for (; *atts != 0; atts += 2) { - p.attr_.push_back (attribute_type ()); - split_name (*atts, p.attr_.back ().qname); - p.attr_.back ().value = *(atts + 1); + if (am) + { + qname_type qn; + split_name (*atts, qn); + attribute_map::value_type v (qn, attribute_value ()); + v.second.value = *(atts + 1); + v.second.handled = false; + pe->attr_map_.insert (v); + } + else + { + p.attr_.push_back (attribute_type ()); + split_name (*atts, p.attr_.back ().qname); + p.attr_.back ().value = *(atts + 1); + } } - } - if (am) - p.attr_unhandled_ = p.attr_map_.size (); + if (am) + pe->attr_unhandled_ = pe->attr_map_.size (); + } } XML_StopParser (p.p_, true); @@ -663,7 +719,7 @@ namespace cutl if (ps.parsing == XML_FINISHED) return; - // If this is empty of complex content, see if these are whitespaces. + // If this is empty or complex content, see if these are whitespaces. // switch (p.content ()) { diff --git a/cutl/xml/parser.hxx b/cutl/xml/parser.hxx index c84268a..8aed55f 100644 --- a/cutl/xml/parser.hxx +++ b/cutl/xml/parser.hxx @@ -10,6 +10,7 @@ #include #include #include // std::size_t +#include #include // LIBCUTL_EXTERNAL_EXPAT @@ -128,9 +129,32 @@ namespace cutl next () { if (state_ == state_next) - return next_ (); + return next_ (false); else { + // If we previously peeked at start/end_element, then adjust + // state accordingly. + // + switch (event_) + { + case end_element: + { + if (!element_state_.empty () && + element_state_.back ().depth == depth_) + pop_element (); + + depth_--; + break; + } + case start_element: + { + depth_++; + break; + } + default: + break; + } + state_ = state_next; return event_; } @@ -158,8 +182,9 @@ namespace cutl return event_; else { - state_ = state_peek; - return next_ (); + event_type e (next_ (true)); + state_ = state_peek; // Set it after the call to next_(). + return e; } } @@ -191,6 +216,11 @@ namespace cutl // would conflict with attribute(name,dv) (qualified attributes // are not very common). // + // Attribute map is valid throughout at the "element level" until + // end_element and not just during start_element. As a special case, + // the map is still valid after peek() that returned end_element until + // this end_element event is retrieved with next(). + // const std::string& attribute (const std::string& name) const; @@ -231,20 +261,28 @@ namespace cutl mixed // yes yes preserved }; + // Note that you cannot get/set content while peeking. + // void content (content_type c) { - if (!content_.empty () && content_.back ().depth == depth_) - content_.back ().content = c; + assert (state_ == state_next); + + if (!element_state_.empty () && element_state_.back ().depth == depth_) + element_state_.back ().content = c; else - content_.push_back (content_entry (depth_, c)); + element_state_.push_back (element_entry (depth_, c)); } content_type content () const { - return !content_.empty () && content_.back ().depth == depth_ - ? content_.back ().content : mixed; + assert (state_ == state_next); + + return + !element_state_.empty () && element_state_.back ().depth == depth_ + ? element_state_.back ().content + : mixed; } private: @@ -265,7 +303,7 @@ namespace cutl private: event_type - next_ (); + next_ (bool peek); event_type next_body (); @@ -296,18 +334,6 @@ namespace cutl unsigned long long line_; unsigned long long column_; - // Attributes as a map. - // - struct attribute_value - { - std::string value; - mutable bool handled; - }; - - typedef std::map attribute_map; - attribute_map attr_map_; - mutable attribute_map::size_type attr_unhandled_; - // Attributes as events. // struct attribute_type @@ -331,18 +357,40 @@ namespace cutl namespace_decls end_ns_; namespace_decls::size_type end_ns_i_; // Index of the current decl. - // Content. + // Attributes as a map. + // + struct attribute_value + { + std::string value; + mutable bool handled; + }; + + typedef std::map attribute_map; + + // Element state consisting of the content model and attribute map. // - struct content_entry + struct element_entry { - content_entry (std::size_t d, content_type c) - : depth (d), content (c) {} + element_entry (std::size_t d, content_type c = mixed) + : depth (d), content (c), attr_unhandled_ (0) {} std::size_t depth; content_type content; + attribute_map attr_map_; + mutable attribute_map::size_type attr_unhandled_; }; - std::vector content_; + typedef std::vector element_state; + std::vector element_state_; + + // Return the element entry corresponding to the current depth, if + // exists, and NULL otherwise. + // + const element_entry* + get_element () const; + + void + pop_element (); }; LIBCUTL_EXPORT diff --git a/cutl/xml/parser.txx b/cutl/xml/parser.txx index cf27f2c..e5286f8 100644 --- a/cutl/xml/parser.txx +++ b/cutl/xml/parser.txx @@ -12,19 +12,22 @@ namespace cutl T parser:: attribute (const qname_type& qn, const T& dv) const { - attribute_map::const_iterator i (attr_map_.find (qn)); - - if (i != attr_map_.end ()) + if (const element_entry* e = get_element ()) { - if (!i->second.handled) + attribute_map::const_iterator i (e->attr_map_.find (qn)); + + if (i != e->attr_map_.end ()) { - i->second.handled = true; - attr_unhandled_--; + if (!i->second.handled) + { + i->second.handled = true; + e->attr_unhandled_--; + } + return value_traits::parse (i->second.value, *this); } - return value_traits::parse (i->second.value, *this); } - else - return dv; + + return dv; } } } diff --git a/tests/xml/parser/driver.cxx b/tests/xml/parser/driver.cxx index 4c85b77..39d5994 100644 --- a/tests/xml/parser/driver.cxx +++ b/tests/xml/parser/driver.cxx @@ -98,6 +98,27 @@ main () p.next_expect (parser::end_element); } + { + istringstream is (""); + parser p (is, "test"); + p.next_expect (parser::start_element, "root"); + assert (p.attribute ("a") == "a"); + assert (p.peek () == parser::start_element && p.name () == "nested"); + assert (p.attribute ("a") == "a"); + p.next_expect (parser::start_element, "nested"); + assert (p.attribute ("a") == "A"); + p.next_expect (parser::start_element, "inner"); + assert (p.attribute ("a", "") == ""); + p.next_expect (parser::end_element); + assert (p.attribute ("a") == "A"); + assert (p.peek () == parser::end_element); + assert (p.attribute ("a") == "A"); // Still valid. + p.next_expect (parser::end_element); + assert (p.attribute ("a") == "a"); + p.next_expect (parser::end_element); + assert (p.attribute ("a", "") == ""); + } + try { istringstream is (""); -- cgit v1.1