aboutsummaryrefslogtreecommitdiff
path: root/cutl/xml/parser.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'cutl/xml/parser.cxx')
-rw-r--r--cutl/xml/parser.cxx826
1 files changed, 0 insertions, 826 deletions
diff --git a/cutl/xml/parser.cxx b/cutl/xml/parser.cxx
deleted file mode 100644
index 11e3e97..0000000
--- a/cutl/xml/parser.cxx
+++ /dev/null
@@ -1,826 +0,0 @@
-// file : cutl/xml/parser.cxx
-// license : MIT; see accompanying LICENSE file
-
-#include <new> // std::bad_alloc
-#include <cassert>
-#include <cstring> // std::strchr
-#include <istream>
-#include <ostream>
-#include <sstream>
-
-#include <cutl/xml/parser.hxx>
-
-using namespace std;
-
-namespace cutl
-{
- namespace xml
- {
- // parsing
- //
- parsing::
- ~parsing () throw () {}
-
- parsing::
- parsing (const string& n,
- unsigned long long l,
- unsigned long long c,
- const string& d)
- : name_ (n), line_ (l), column_ (c), description_ (d)
- {
- init ();
- }
-
- parsing::
- parsing (const parser& p, const std::string& d)
- : name_ (p.input_name ()),
- line_ (p.line ()),
- column_ (p.column ()),
- description_ (d)
- {
- init ();
- }
-
- void parsing::
- init ()
- {
- ostringstream os;
- if (!name_.empty ())
- os << name_ << ':';
- os << line_ << ':' << column_ << ": error: " << description_;
- what_ = os.str ();
- }
-
- char const* parsing::
- what () const throw ()
- {
- return what_.c_str ();
- }
-
- // parser::event_type
- //
- static const char* parser_event_str[] =
- {
- "start element",
- "end element",
- "start attribute",
- "end attribute",
- "characters",
- "start namespace declaration",
- "end namespace declaration",
- "end of file"
- };
-
- ostream&
- operator<< (ostream& os, parser::event_type e)
- {
- return os << parser_event_str[e];
- }
-
- // parser
- //
- parser::
- ~parser ()
- {
- if (p_ != 0)
- XML_ParserFree (p_);
- }
-
- parser::
- parser (istream& is, const string& iname, feature_type f)
- : is_ (is), iname_ (iname), feature_ (f),
- depth_ (0), state_ (state_next), event_ (eof), queue_ (eof),
- pqname_ (&qname_), pvalue_ (&value_),
- attr_i_ (0), start_ns_i_ (0), end_ns_i_ (0)
- {
- if ((feature_ & receive_attributes_map) != 0 &&
- (feature_ & receive_attributes_event) != 0)
- feature_ &= ~receive_attributes_map;
-
- // Allocate the parser. Make sure nothing else can throw after
- // this call since otherwise we will leak it.
- //
- p_ = XML_ParserCreateNS (0, XML_Char (' '));
-
- if (p_ == 0)
- throw bad_alloc ();
-
- // Get prefixes in addition to namespaces and local names.
- //
- XML_SetReturnNSTriplet (p_, true);
-
- // Set handlers.
- //
- XML_SetUserData(p_, this);
-
- if ((f & receive_elements) != 0)
- {
- XML_SetStartElementHandler (p_, &start_element_);
- XML_SetEndElementHandler (p_, &end_element_);
- }
-
- if ((f & receive_characters) != 0)
- XML_SetCharacterDataHandler (p_, &characters_);
-
- if ((f & receive_namespace_decls) != 0)
- XML_SetNamespaceDeclHandler (p_,
- &start_namespace_decl_,
- &end_namespace_decl_);
- }
-
- void parser::
- handle_error ()
- {
- XML_Error e (XML_GetErrorCode (p_));
-
- if (e == XML_ERROR_ABORTED)
- {
- // For now we only abort the parser in the characters_() handler.
- //
- switch (content ())
- {
- case empty:
- throw parsing (*this, "character in empty content");
- case complex:
- throw parsing (*this, "character in complex content");
- default:
- assert (false);
- }
- }
- else
- throw parsing (iname_,
- XML_GetCurrentLineNumber (p_),
- XML_GetCurrentColumnNumber (p_),
- XML_ErrorString (e));
- }
-
- struct stream_exception_controller
- {
- ~stream_exception_controller ()
- {
- istream::iostate s = is_.rdstate ();
- s &= ~istream::failbit;
-
- // If our error state (sans failbit) intersects with the
- // exception state then that means we have an active
- // exception and changing error/exception state will
- // cause another to be thrown.
- //
- if (!(old_state_ & s))
- {
- // Clear failbit if it was caused by eof.
- //
- if (is_.fail () && is_.eof ())
- is_.clear (s);
-
- is_.exceptions (old_state_);
- }
- }
-
- stream_exception_controller (istream& is)
- : is_ (is), old_state_ (is_.exceptions ())
- {
- is_.exceptions (old_state_ & ~istream::failbit);
- }
-
- private:
- stream_exception_controller (const stream_exception_controller&);
-
- stream_exception_controller&
- operator= (const stream_exception_controller&);
-
- private:
- istream& is_;
- istream::iostate old_state_;
- };
-
- const string& parser::
- attribute (const qname_type& qn) const
- {
- if (const element_entry* e = get_element ())
- {
- attribute_map_type::const_iterator i (e->attr_map_.find (qn));
-
- if (i != e->attr_map_.end ())
- {
- if (!i->second.handled)
- {
- i->second.handled = true;
- e->attr_unhandled_--;
- }
- return i->second.value;
- }
- }
-
- throw parsing (*this, "attribute '" + qn.string () + "' expected");
- }
-
- string parser::
- attribute (const qname_type& qn, const string& dv) const
- {
- if (const element_entry* e = get_element ())
- {
- attribute_map_type::const_iterator i (e->attr_map_.find (qn));
-
- if (i != e->attr_map_.end ())
- {
- if (!i->second.handled)
- {
- i->second.handled = true;
- e->attr_unhandled_--;
- }
- return i->second.value;
- }
- }
-
- return dv;
- }
-
- bool parser::
- attribute_present (const qname_type& qn) const
- {
- if (const element_entry* e = get_element ())
- {
- attribute_map_type::const_iterator i (e->attr_map_.find (qn));
-
- if (i != e->attr_map_.end ())
- {
- if (!i->second.handled)
- {
- i->second.handled = true;
- e->attr_unhandled_--;
- }
- return true;
- }
- }
-
- return false;
- }
-
- void parser::
- next_expect (event_type e)
- {
- if (next () != e)
- throw parsing (*this, string (parser_event_str[e]) + " expected");
- }
-
- void parser::
- next_expect (event_type e, const string& ns, const string& n)
- {
- if (next () != e || namespace_ () != ns || name () != n)
- throw parsing (*this,
- string (parser_event_str[e]) + " '" +
- qname_type (ns, n).string () + "' expected");
- }
-
- const parser::element_entry* parser::
- get_element () const
- {
- // The start_element_() Expat handler may have already provisioned
- // an entry in the element stack. In this case, we need to get the
- // one before it, if any.
- //
- const element_entry* r (0);
- element_state::size_type n (element_state_.size ());
- if (n != 0)
- {
- n--;
- if (element_state_[n].depth == depth_)
- r = &element_state_[n];
- else if (n != 0 && element_state_[n].depth > depth_)
- {
- n--;
- if (element_state_[n].depth == depth_)
- r = &element_state_[n];
- }
- }
- return r;
- }
-
- void parser::
- pop_element ()
- {
- // Make sure there are no unhandled attributes left.
- //
- const element_entry& e (element_state_.back ());
- if (e.attr_unhandled_ != 0)
- {
- // Find the first unhandled attribute and report it.
- //
- for (attribute_map_type::const_iterator i (e.attr_map_.begin ());
- i != e.attr_map_.end (); ++i)
- {
- if (!i->second.handled)
- throw parsing (
- *this, "unexpected attribute '" + i->first.string () + "'");
- }
- assert (false);
- }
-
- element_state_.pop_back ();
- }
-
- parser::event_type parser::
- next_ (bool peek)
- {
- event_type e (next_body ());
-
- // Content-specific processing. Note that we handle characters in the
- // characters_() Expat handler for two reasons. Firstly, it is faster
- // to ignore the whitespaces at the source. Secondly, this allows us
- // to distinguish between element and attribute characters. We can
- // move this processing to the handler because the characters event
- // is never queued.
- //
- switch (e)
- {
- case end_element:
- {
- // If this is a peek, then avoid popping the stack just yet.
- // This way, the attribute map will still be valid until we
- // call next().
- //
- if (!peek)
- {
- if (!element_state_.empty () &&
- element_state_.back ().depth == depth_)
- pop_element ();
-
- depth_--;
- }
- break;
- }
- case start_element:
- {
- const element_entry* e (get_element ());
- switch (e != 0 ? e->content : mixed)
- {
- case empty:
- throw parsing (*this, "element in empty content");
- case simple:
- throw parsing (*this, "element in simple content");
- default:
- break;
- }
-
- // If this is a peek, then delay adjusting the depth.
- //
- if (!peek)
- depth_++;
-
- break;
- }
- default:
- break;
- }
-
- return e;
- }
-
- parser::event_type parser::
- next_body ()
- {
- // See if we have any start namespace declarations we need to return.
- //
- if (start_ns_i_ < start_ns_.size ())
- {
- // Based on the previous event determine what's the next one must be.
- //
- switch (event_)
- {
- case start_namespace_decl:
- {
- if (++start_ns_i_ == start_ns_.size ())
- {
- start_ns_i_ = 0;
- start_ns_.clear ();
- pqname_ = &qname_;
- break; // No more declarations.
- }
- // Fall through.
- }
- case start_element:
- {
- event_ = start_namespace_decl;
- pqname_ = &start_ns_[start_ns_i_];
- return event_;
- }
- default:
- {
- assert (false);
- return event_ = eof;
- }
- }
- }
-
- // See if we have any attributes we need to return as events.
- //
- if (attr_i_ < attr_.size ())
- {
- // Based on the previous event determine what's the next one must be.
- //
- switch (event_)
- {
- case start_attribute:
- {
- event_ = characters;
- pvalue_ = &attr_[attr_i_].value;
- return event_;
- }
- case characters:
- {
- event_ = end_attribute; // Name is already set.
- return event_;
- }
- case end_attribute:
- {
- if (++attr_i_ == attr_.size ())
- {
- attr_i_ = 0;
- attr_.clear ();
- pqname_ = &qname_;
- pvalue_ = &value_;
- break; // No more attributes.
- }
- // Fall through.
- }
- case start_element:
- case start_namespace_decl:
- {
- event_ = start_attribute;
- pqname_ = &attr_[attr_i_].qname;
- return event_;
- }
- default:
- {
- assert (false);
- return event_ = eof;
- }
- }
- }
-
- // See if we have any end namespace declarations we need to return.
- //
- if (end_ns_i_ < end_ns_.size ())
- {
- // Based on the previous event determine what's the next one must be.
- //
- switch (event_)
- {
- case end_namespace_decl:
- {
- if (++end_ns_i_ == end_ns_.size ())
- {
- end_ns_i_ = 0;
- end_ns_.clear ();
- pqname_ = &qname_;
- break; // No more declarations.
- }
- // Fall through.
- }
- // The end namespace declaration comes before the end element
- // which means it can follow pretty much any other event.
- //
- default:
- {
- event_ = end_namespace_decl;
- pqname_ = &end_ns_[end_ns_i_];
- return event_;
- }
- }
- }
-
- // Check the queue.
- //
- if (queue_ != eof)
- {
- event_ = queue_;
- queue_ = eof;
- return event_;
- }
-
- XML_ParsingStatus ps;
- XML_GetParsingStatus (p_, &ps);
-
- switch (ps.parsing)
- {
- case XML_INITIALIZED:
- {
- // As if we finished the previous chunk.
- break;
- }
- case XML_PARSING:
- {
- assert (false);
- return event_ = eof;
- }
- case XML_FINISHED:
- {
- return event_ = eof;
- }
- case XML_SUSPENDED:
- {
- switch (XML_ResumeParser (p_))
- {
- case XML_STATUS_SUSPENDED:
- {
- // If the parser is again in the suspended state, then
- // that means we have the next event.
- //
- return event_;
- }
- case XML_STATUS_OK:
- {
- // Otherwise, we need to get and parse the next chunk of data
- // unless this was the last chunk, in which case this is eof.
- //
- if (ps.finalBuffer)
- return event_ = eof;
-
- break;
- }
- case XML_STATUS_ERROR:
- handle_error ();
- }
-
- break;
- }
- }
-
- // Get and parse the next chunk of data until we get the next event
- // or reach eof.
- //
- event_ = eof;
- XML_Status s;
- do
- {
- const size_t cap (4096);
-
- char* b (static_cast<char*> (XML_GetBuffer (p_, cap)));
- if (b == 0)
- throw bad_alloc ();
-
- // Temporarily unset the exception failbit. Also clear the fail bit
- // when we reset the old state if it was caused by eof.
- //
- {
- stream_exception_controller sec (is_);
- is_.read (b, static_cast<streamsize> (cap));
- }
-
- s = XML_ParseBuffer (p_, static_cast<int> (is_.gcount ()), is_.eof ());
-
- if (s == XML_STATUS_ERROR)
- handle_error ();
-
- } while (s != XML_STATUS_SUSPENDED && !is_.eof ());
-
- return event_;
- }
-
- static void
- split_name (const XML_Char* s, qname& qn)
- {
- string& ns (qn.namespace_ ());
- string& name (qn.name ());
- string& prefix (qn.prefix ());
-
- const char* p (strchr (s, ' '));
-
- if (p == 0)
- {
- ns.clear ();
- name = s;
- prefix.clear ();
- }
- else
- {
- ns.assign (s, 0, p - s);
-
- s = p + 1;
- p = strchr (s, ' ');
-
- if (p == 0)
- {
- name = s;
- prefix.clear ();
- }
- else
- {
- name.assign (s, 0, p - s);
- prefix = p + 1;
- }
- }
- }
-
- void XMLCALL parser::
- start_element_ (void* v, const XML_Char* name, const XML_Char** atts)
- {
- parser& p (*static_cast<parser*> (v));
-
- XML_ParsingStatus ps;
- XML_GetParsingStatus (p.p_, &ps);
-
- // Expat has a (mis)-feature of a possibily calling handlers even
- // after the non-resumable XML_StopParser call.
- //
- if (ps.parsing == XML_FINISHED)
- return;
-
- // Cannot be a followup event.
- //
- assert (ps.parsing == XML_PARSING);
-
- p.event_ = start_element;
- split_name (name, p.qname_);
-
- p.line_ = XML_GetCurrentLineNumber (p.p_);
- p.column_ = XML_GetCurrentColumnNumber (p.p_);
-
- // Handle attributes.
- //
- if (*atts != 0)
- {
- bool am ((p.feature_ & receive_attributes_map) != 0);
- bool ae ((p.feature_ & receive_attributes_event) != 0);
-
- // Provision an entry for this element.
- //
- element_entry* pe (0);
- if (am)
- {
- p.element_state_.push_back (element_entry (p.depth_ + 1));
- pe = &p.element_state_.back ();
- }
-
- if (am || ae)
- {
- for (; *atts != 0; atts += 2)
- {
- if (am)
- {
- qname_type qn;
- split_name (*atts, qn);
- attribute_map_type::value_type v (qn, attribute_value_type ());
- v.second.value = *(atts + 1);
- v.second.handled = false;
- pe->attr_map_.insert (v);
- }
- else
- {
- p.attr_.push_back (attribute_type ());
- split_name (*atts, p.attr_.back ().qname);
- p.attr_.back ().value = *(atts + 1);
- }
- }
-
- if (am)
- pe->attr_unhandled_ = pe->attr_map_.size ();
- }
- }
-
- XML_StopParser (p.p_, true);
- }
-
- void XMLCALL parser::
- end_element_ (void* v, const XML_Char* name)
- {
- parser& p (*static_cast<parser*> (v));
-
- XML_ParsingStatus ps;
- XML_GetParsingStatus (p.p_, &ps);
-
- // Expat has a (mis)-feature of a possibily calling handlers even
- // after the non-resumable XML_StopParser call.
- //
- if (ps.parsing == XML_FINISHED)
- return;
-
- // This can be a followup event for empty elements (<foo/>). In this
- // case the element name is already set.
- //
- if (ps.parsing != XML_PARSING)
- p.queue_ = end_element;
- else
- {
- // We may also have the end namespace declaration events which
- // should come before the end element. If that's the case, then
- // queue the end element and return the end namespace as the next
- // event.
- //
- if (p.end_ns_i_ < p.end_ns_.size ())
- {
- p.event_ = end_namespace_decl;
- p.queue_ = end_element;
- }
- else
- p.event_ = end_element;
-
- split_name (name, p.qname_);
-
- p.line_ = XML_GetCurrentLineNumber (p.p_);
- p.column_ = XML_GetCurrentColumnNumber (p.p_);
-
- XML_StopParser (p.p_, true);
- }
- }
-
- void XMLCALL parser::
- characters_ (void* v, const XML_Char* s, int n)
- {
- parser& p (*static_cast<parser*> (v));
-
- XML_ParsingStatus ps;
- XML_GetParsingStatus (p.p_, &ps);
-
- // Expat has a (mis)-feature of a possibily calling handlers even
- // after the non-resumable XML_StopParser call.
- //
- if (ps.parsing == XML_FINISHED)
- return;
-
- // If this is empty or complex content, see if these are whitespaces.
- //
- switch (p.content ())
- {
- case empty:
- case complex:
- {
- for (int i (0); i != n; ++i)
- {
- char c (s[i]);
- if (c == 0x20 || c == 0x0A || c == 0x0D || c == 0x09)
- continue;
-
- // It would have been easier to throw the exception directly,
- // however, the Expat code is most likely not exception safe.
- //
- p.line_ = XML_GetCurrentLineNumber (p.p_);
- p.column_ = XML_GetCurrentColumnNumber (p.p_);
- XML_StopParser (p.p_, false);
- break;
- }
- return;
- }
- default:
- break;
- }
-
- // This can be a followup event for another character event. In
- // this case simply append the data.
- //
- if (ps.parsing != XML_PARSING)
- {
- assert (p.event_ == characters);
- p.value_.append (s, n);
- }
- else
- {
- p.event_ = characters;
- p.value_.assign (s, n);
-
- p.line_ = XML_GetCurrentLineNumber (p.p_);
- p.column_ = XML_GetCurrentColumnNumber (p.p_);
-
- XML_StopParser (p.p_, true);
- }
- }
-
- void XMLCALL parser::
- start_namespace_decl_ (void* v, const XML_Char* prefix, const XML_Char* ns)
- {
- parser& p (*static_cast<parser*> (v));
-
- XML_ParsingStatus ps;
- XML_GetParsingStatus (p.p_, &ps);
-
- // Expat has a (mis)-feature of a possibily calling handlers even
- // after the non-resumable XML_StopParser call.
- //
- if (ps.parsing == XML_FINISHED)
- return;
-
- p.start_ns_.push_back (qname_type ());
- p.start_ns_.back ().prefix () = (prefix != 0 ? prefix : "");
- p.start_ns_.back ().namespace_ () = (ns != 0 ? ns : "");
- }
-
- void XMLCALL parser::
- end_namespace_decl_ (void* v, const XML_Char* prefix)
- {
- parser& p (*static_cast<parser*> (v));
-
- XML_ParsingStatus ps;
- XML_GetParsingStatus (p.p_, &ps);
-
- // Expat has a (mis)-feature of a possibily calling handlers even
- // after the non-resumable XML_StopParser call.
- //
- if (ps.parsing == XML_FINISHED)
- return;
-
- p.end_ns_.push_back (qname_type ());
- p.end_ns_.back ().prefix () = (prefix != 0 ? prefix : "");
- }
- }
-}