aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cutl/xml/parser.cxx202
-rw-r--r--cutl/xml/parser.hxx100
-rw-r--r--cutl/xml/parser.txx21
-rw-r--r--tests/xml/parser/driver.cxx21
4 files changed, 236 insertions, 108 deletions
diff --git a/cutl/xml/parser.cxx b/cutl/xml/parser.cxx
index ee62a24..f57d9fd 100644
--- a/cutl/xml/parser.cxx
+++ b/cutl/xml/parser.cxx
@@ -92,7 +92,7 @@ namespace cutl
: is_ (is), iname_ (iname), feature_ (f),
depth_ (0), state_ (state_next), event_ (eof), queue_ (eof),
pqname_ (&qname_), pvalue_ (&value_),
- attr_unhandled_ (0), attr_i_ (0), start_ns_i_ (0), end_ns_i_ (0)
+ attr_i_ (0), start_ns_i_ (0), end_ns_i_ (0)
{
if ((feature_ & receive_attributes_map) != 0 &&
(feature_ & receive_attributes_event) != 0)
@@ -198,37 +198,43 @@ namespace cutl
const string& parser::
attribute (const qname_type& qn) const
{
- attribute_map::const_iterator i (attr_map_.find (qn));
-
- if (i != attr_map_.end ())
+ if (const element_entry* e = get_element ())
{
- if (!i->second.handled)
+ attribute_map::const_iterator i (e->attr_map_.find (qn));
+
+ if (i != e->attr_map_.end ())
{
- i->second.handled = true;
- attr_unhandled_--;
+ if (!i->second.handled)
+ {
+ i->second.handled = true;
+ e->attr_unhandled_--;
+ }
+ return i->second.value;
}
- return i->second.value;
}
- else
- throw parsing (*this, "attribute '" + qn.string () + "' expected");
+
+ throw parsing (*this, "attribute '" + qn.string () + "' expected");
}
string parser::
attribute (const qname_type& qn, const string& dv) const
{
- attribute_map::const_iterator i (attr_map_.find (qn));
-
- if (i != attr_map_.end ())
+ if (const element_entry* e = get_element ())
{
- if (!i->second.handled)
+ attribute_map::const_iterator i (e->attr_map_.find (qn));
+
+ if (i != e->attr_map_.end ())
{
- i->second.handled = true;
- attr_unhandled_--;
+ if (!i->second.handled)
+ {
+ i->second.handled = true;
+ e->attr_unhandled_--;
+ }
+ return i->second.value;
}
- return i->second.value;
}
- else
- return dv;
+
+ return dv;
}
void parser::
@@ -247,8 +253,55 @@ namespace cutl
qname_type (ns, n).string () + "' expected");
}
+ const parser::element_entry* parser::
+ get_element () const
+ {
+ // The start_element_() Expat handler may have already provisioned
+ // an entry in the element stack. In this case, we need to get the
+ // one before it, if any.
+ //
+ const element_entry* r (0);
+ element_state::size_type n (element_state_.size ());
+ if (n != 0)
+ {
+ n--;
+ if (element_state_[n].depth == depth_)
+ r = &element_state_[n];
+ else if (n != 0 && element_state_[n].depth > depth_)
+ {
+ n--;
+ if (element_state_[n].depth == depth_)
+ r = &element_state_[n];
+ }
+ }
+ return r;
+ }
+
+ void parser::
+ pop_element ()
+ {
+ // Make sure there are no unhandled attributes left.
+ //
+ const element_entry& e (element_state_.back ());
+ if (e.attr_unhandled_ != 0)
+ {
+ // Find the first unhandled attribute and report it.
+ //
+ for (attribute_map::const_iterator i (e.attr_map_.begin ());
+ i != e.attr_map_.end (); ++i)
+ {
+ if (!i->second.handled)
+ throw parsing (
+ *this, "unexpected attribute '" + i->first.string () + "'");
+ }
+ assert (false);
+ }
+
+ element_state_.pop_back ();
+ }
+
parser::event_type parser::
- next_ ()
+ next_ (bool peek)
{
event_type e (next_body ());
@@ -261,9 +314,26 @@ namespace cutl
//
switch (e)
{
+ case end_element:
+ {
+ // If this is a peek, then avoid popping the stack just yet.
+ // This way, the attribute map will still be valid until we
+ // call next().
+ //
+ if (!peek)
+ {
+ if (!element_state_.empty () &&
+ element_state_.back ().depth == depth_)
+ pop_element ();
+
+ depth_--;
+ }
+ break;
+ }
case start_element:
{
- switch (content ())
+ const element_entry* e (get_element ());
+ switch (e != 0 ? e->content : mixed)
{
case empty:
throw parsing (*this, "element in empty content");
@@ -273,15 +343,11 @@ namespace cutl
break;
}
- depth_++;
- break;
- }
- case end_element:
- {
- if (!content_.empty () && content_.back ().depth == depth_)
- content_.pop_back ();
+ // If this is a peek, then delay adjusting the depth.
+ //
+ if (!peek)
+ depth_++;
- depth_--;
break;
}
default:
@@ -294,29 +360,6 @@ namespace cutl
parser::event_type parser::
next_body ()
{
- // If the previous event is start_element and we return attributes
- // as a map, make sure there are no unhandled attributes left. Also
- // clear the map.
- //
- if (event_ == start_element && (feature_ & receive_attributes_map) != 0)
- {
- if (attr_unhandled_ != 0)
- {
- // Find the first unhandled attribute and report it.
- //
- for (attribute_map::const_iterator i (attr_map_.begin ());
- i != attr_map_.end (); ++i)
- {
- if (!i->second.handled)
- throw parsing (
- *this, "unexpected attribute '" + i->first.string () + "'");
- }
- assert (false);
- }
-
- attr_map_.clear ();
- }
-
// See if we have any start namespace declarations we need to return.
//
if (start_ns_i_ < start_ns_.size ())
@@ -576,31 +619,44 @@ namespace cutl
// Handle attributes.
//
- bool am ((p.feature_ & receive_attributes_map) != 0);
- bool ae ((p.feature_ & receive_attributes_event) != 0);
- if (am || ae)
+ if (*atts != 0)
{
- for (; *atts != 0; atts += 2)
+ bool am ((p.feature_ & receive_attributes_map) != 0);
+ bool ae ((p.feature_ & receive_attributes_event) != 0);
+
+ // Provision an entry for this element.
+ //
+ element_entry* pe (0);
+ if (am)
{
- if (am)
- {
- qname_type qn;
- split_name (*atts, qn);
- attribute_map::value_type v (qn, attribute_value ());
- v.second.value = *(atts + 1);
- v.second.handled = false;
- p.attr_map_.insert (v);
- }
- else
+ p.element_state_.push_back (element_entry (p.depth_ + 1));
+ pe = &p.element_state_.back ();
+ }
+
+ if (am || ae)
+ {
+ for (; *atts != 0; atts += 2)
{
- p.attr_.push_back (attribute_type ());
- split_name (*atts, p.attr_.back ().qname);
- p.attr_.back ().value = *(atts + 1);
+ if (am)
+ {
+ qname_type qn;
+ split_name (*atts, qn);
+ attribute_map::value_type v (qn, attribute_value ());
+ v.second.value = *(atts + 1);
+ v.second.handled = false;
+ pe->attr_map_.insert (v);
+ }
+ else
+ {
+ p.attr_.push_back (attribute_type ());
+ split_name (*atts, p.attr_.back ().qname);
+ p.attr_.back ().value = *(atts + 1);
+ }
}
- }
- if (am)
- p.attr_unhandled_ = p.attr_map_.size ();
+ if (am)
+ pe->attr_unhandled_ = pe->attr_map_.size ();
+ }
}
XML_StopParser (p.p_, true);
@@ -663,7 +719,7 @@ namespace cutl
if (ps.parsing == XML_FINISHED)
return;
- // If this is empty of complex content, see if these are whitespaces.
+ // If this is empty or complex content, see if these are whitespaces.
//
switch (p.content ())
{
diff --git a/cutl/xml/parser.hxx b/cutl/xml/parser.hxx
index c84268a..8aed55f 100644
--- a/cutl/xml/parser.hxx
+++ b/cutl/xml/parser.hxx
@@ -10,6 +10,7 @@
#include <string>
#include <iosfwd>
#include <cstddef> // std::size_t
+#include <cassert>
#include <cutl/details/config.hxx> // LIBCUTL_EXTERNAL_EXPAT
@@ -128,9 +129,32 @@ namespace cutl
next ()
{
if (state_ == state_next)
- return next_ ();
+ return next_ (false);
else
{
+ // If we previously peeked at start/end_element, then adjust
+ // state accordingly.
+ //
+ switch (event_)
+ {
+ case end_element:
+ {
+ if (!element_state_.empty () &&
+ element_state_.back ().depth == depth_)
+ pop_element ();
+
+ depth_--;
+ break;
+ }
+ case start_element:
+ {
+ depth_++;
+ break;
+ }
+ default:
+ break;
+ }
+
state_ = state_next;
return event_;
}
@@ -158,8 +182,9 @@ namespace cutl
return event_;
else
{
- state_ = state_peek;
- return next_ ();
+ event_type e (next_ (true));
+ state_ = state_peek; // Set it after the call to next_().
+ return e;
}
}
@@ -191,6 +216,11 @@ namespace cutl
// would conflict with attribute(name,dv) (qualified attributes
// are not very common).
//
+ // Attribute map is valid throughout at the "element level" until
+ // end_element and not just during start_element. As a special case,
+ // the map is still valid after peek() that returned end_element until
+ // this end_element event is retrieved with next().
+ //
const std::string&
attribute (const std::string& name) const;
@@ -231,20 +261,28 @@ namespace cutl
mixed // yes yes preserved
};
+ // Note that you cannot get/set content while peeking.
+ //
void
content (content_type c)
{
- if (!content_.empty () && content_.back ().depth == depth_)
- content_.back ().content = c;
+ assert (state_ == state_next);
+
+ if (!element_state_.empty () && element_state_.back ().depth == depth_)
+ element_state_.back ().content = c;
else
- content_.push_back (content_entry (depth_, c));
+ element_state_.push_back (element_entry (depth_, c));
}
content_type
content () const
{
- return !content_.empty () && content_.back ().depth == depth_
- ? content_.back ().content : mixed;
+ assert (state_ == state_next);
+
+ return
+ !element_state_.empty () && element_state_.back ().depth == depth_
+ ? element_state_.back ().content
+ : mixed;
}
private:
@@ -265,7 +303,7 @@ namespace cutl
private:
event_type
- next_ ();
+ next_ (bool peek);
event_type
next_body ();
@@ -296,18 +334,6 @@ namespace cutl
unsigned long long line_;
unsigned long long column_;
- // Attributes as a map.
- //
- struct attribute_value
- {
- std::string value;
- mutable bool handled;
- };
-
- typedef std::map<qname_type, attribute_value> attribute_map;
- attribute_map attr_map_;
- mutable attribute_map::size_type attr_unhandled_;
-
// Attributes as events.
//
struct attribute_type
@@ -331,18 +357,40 @@ namespace cutl
namespace_decls end_ns_;
namespace_decls::size_type end_ns_i_; // Index of the current decl.
- // Content.
+ // Attributes as a map.
+ //
+ struct attribute_value
+ {
+ std::string value;
+ mutable bool handled;
+ };
+
+ typedef std::map<qname_type, attribute_value> attribute_map;
+
+ // Element state consisting of the content model and attribute map.
//
- struct content_entry
+ struct element_entry
{
- content_entry (std::size_t d, content_type c)
- : depth (d), content (c) {}
+ element_entry (std::size_t d, content_type c = mixed)
+ : depth (d), content (c), attr_unhandled_ (0) {}
std::size_t depth;
content_type content;
+ attribute_map attr_map_;
+ mutable attribute_map::size_type attr_unhandled_;
};
- std::vector<content_entry> content_;
+ typedef std::vector<element_entry> element_state;
+ std::vector<element_entry> element_state_;
+
+ // Return the element entry corresponding to the current depth, if
+ // exists, and NULL otherwise.
+ //
+ const element_entry*
+ get_element () const;
+
+ void
+ pop_element ();
};
LIBCUTL_EXPORT
diff --git a/cutl/xml/parser.txx b/cutl/xml/parser.txx
index cf27f2c..e5286f8 100644
--- a/cutl/xml/parser.txx
+++ b/cutl/xml/parser.txx
@@ -12,19 +12,22 @@ namespace cutl
T parser::
attribute (const qname_type& qn, const T& dv) const
{
- attribute_map::const_iterator i (attr_map_.find (qn));
-
- if (i != attr_map_.end ())
+ if (const element_entry* e = get_element ())
{
- if (!i->second.handled)
+ attribute_map::const_iterator i (e->attr_map_.find (qn));
+
+ if (i != e->attr_map_.end ())
{
- i->second.handled = true;
- attr_unhandled_--;
+ if (!i->second.handled)
+ {
+ i->second.handled = true;
+ e->attr_unhandled_--;
+ }
+ return value_traits<T>::parse (i->second.value, *this);
}
- return value_traits<T>::parse (i->second.value, *this);
}
- else
- return dv;
+
+ return dv;
}
}
}
diff --git a/tests/xml/parser/driver.cxx b/tests/xml/parser/driver.cxx
index 4c85b77..39d5994 100644
--- a/tests/xml/parser/driver.cxx
+++ b/tests/xml/parser/driver.cxx
@@ -98,6 +98,27 @@ main ()
p.next_expect (parser::end_element);
}
+ {
+ istringstream is ("<root a='a'><nested a='A'><inner/></nested></root>");
+ parser p (is, "test");
+ p.next_expect (parser::start_element, "root");
+ assert (p.attribute ("a") == "a");
+ assert (p.peek () == parser::start_element && p.name () == "nested");
+ assert (p.attribute ("a") == "a");
+ p.next_expect (parser::start_element, "nested");
+ assert (p.attribute ("a") == "A");
+ p.next_expect (parser::start_element, "inner");
+ assert (p.attribute ("a", "") == "");
+ p.next_expect (parser::end_element);
+ assert (p.attribute ("a") == "A");
+ assert (p.peek () == parser::end_element);
+ assert (p.attribute ("a") == "A"); // Still valid.
+ p.next_expect (parser::end_element);
+ assert (p.attribute ("a") == "a");
+ p.next_expect (parser::end_element);
+ assert (p.attribute ("a", "") == "");
+ }
+
try
{
istringstream is ("<root a='a' b='b'/>");