aboutsummaryrefslogtreecommitdiff
path: root/cutl/xml
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2013-03-08 17:41:00 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2013-03-08 17:41:00 +0200
commit2ac86dfdafb9bf4d4e9252260a431755b56d8ebe (patch)
tree30a19391906b913b8a0773c7fe5a4b046c0703c4 /cutl/xml
parentdf1e7318da3320cfcfaee7f3c1f00a9eee7c9d25 (diff)
Add support for XML parsing and serialization
Diffstat (limited to 'cutl/xml')
-rw-r--r--cutl/xml/exception.hxx19
-rw-r--r--cutl/xml/parser.cxx636
-rw-r--r--cutl/xml/parser.hxx238
-rw-r--r--cutl/xml/qname.cxx22
-rw-r--r--cutl/xml/qname.hxx74
-rw-r--r--cutl/xml/serializer.cxx245
-rw-r--r--cutl/xml/serializer.hxx154
-rw-r--r--cutl/xml/serializer.ixx45
8 files changed, 1433 insertions, 0 deletions
diff --git a/cutl/xml/exception.hxx b/cutl/xml/exception.hxx
new file mode 100644
index 0000000..c4d3c0d
--- /dev/null
+++ b/cutl/xml/exception.hxx
@@ -0,0 +1,19 @@
+// file : cutl/xml/exception.hxx
+// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC
+// license : MIT; see accompanying LICENSE file
+
+#ifndef CUTL_XML_EXCEPTION_HXX
+#define CUTL_XML_EXCEPTION_HXX
+
+#include <cutl/exception.hxx>
+#include <cutl/details/export.hxx>
+
+namespace cutl
+{
+ namespace xml
+ {
+ struct LIBCUTL_EXPORT exception: cutl::exception {};
+ }
+}
+
+#endif // CUTL_XML_EXCEPTION_HXX
diff --git a/cutl/xml/parser.cxx b/cutl/xml/parser.cxx
new file mode 100644
index 0000000..24a91ee
--- /dev/null
+++ b/cutl/xml/parser.cxx
@@ -0,0 +1,636 @@
+// file : cutl/xml/parser.cxx
+// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC
+// license : MIT; see accompanying LICENSE file
+
+#include <new> // std::bad_alloc
+#include <cassert>
+#include <cstring> // std::strchr
+#include <istream>
+#include <ostream>
+#include <sstream>
+
+#include <cutl/xml/parser.hxx>
+
+using namespace std;
+
+namespace cutl
+{
+ namespace xml
+ {
+ // parsing
+ //
+ parsing::
+ ~parsing () throw () {}
+
+ parsing::
+ parsing (const string& n,
+ unsigned long long l,
+ unsigned long long c,
+ const string& d)
+ : name_ (n), line_ (l), column_ (c), description_ (d)
+ {
+ ostringstream os;
+ if (!n.empty ())
+ os << n << ':';
+ os << l << ':' << c << ": error: " << d;
+ what_ = os.str ();
+ }
+
+ char const* parsing::
+ what () const throw ()
+ {
+ return what_.c_str ();
+ }
+
+ // parser
+ //
+ parser::
+ ~parser ()
+ {
+ if (p_ != 0)
+ XML_ParserFree (p_);
+ }
+
+ parser::
+ parser (istream& is, const string& name, feature_type f)
+ : is_ (is), name_ (name), feature_ (f),
+ depth_ (0), event_ (eof), queue_ (eof),
+ pqname_ (&qname_), pvalue_ (&value_),
+ attr_i_ (0), start_ns_i_ (0), end_ns_i_ (0)
+ {
+ // Allocate the parser. Make sure nothing else can throw after
+ // this call since otherwise we will leak it.
+ //
+ p_ = XML_ParserCreateNS (0, XML_Char (' '));
+
+ if (p_ == 0)
+ throw bad_alloc ();
+
+ // Get prefixes in addition to namespaces and local names.
+ //
+ XML_SetReturnNSTriplet (p_, true);
+
+ // Set handlers.
+ //
+ XML_SetUserData(p_, this);
+
+ if ((f & receive_elements) != 0)
+ {
+ XML_SetStartElementHandler (p_, &start_element_);
+ XML_SetEndElementHandler (p_, &end_element_);
+ }
+
+ if ((f & receive_characters) != 0)
+ XML_SetCharacterDataHandler (p_, &characters_);
+
+ if ((f & receive_namespace_decls) != 0)
+ XML_SetNamespaceDeclHandler (p_,
+ &start_namespace_decl_,
+ &end_namespace_decl_);
+ }
+
+ void parser::
+ handle_error ()
+ {
+ XML_Error e (XML_GetErrorCode (p_));
+
+ if (e == XML_ERROR_ABORTED)
+ {
+ // For now we only abort the parser in the characters_() handler.
+ //
+ switch (content ())
+ {
+ case empty:
+ throw parsing (name_, line_, column_, "character in empty content");
+ case complex:
+ throw parsing (name_, line_, column_, "character in complex content");
+ default:
+ assert (false);
+ }
+ }
+ else
+ throw parsing (name_,
+ XML_GetCurrentLineNumber (p_),
+ XML_GetCurrentColumnNumber (p_),
+ XML_ErrorString (e));
+ }
+
+ struct stream_exception_controller
+ {
+ ~stream_exception_controller ()
+ {
+ istream::iostate s = is_.rdstate ();
+ s &= ~istream::failbit;
+
+ // If our error state (sans failbit) intersects with the
+ // exception state then that means we have an active
+ // exception and changing error/exception state will
+ // cause another to be thrown.
+ //
+ if (!(old_state_ & s))
+ {
+ // Clear failbit if it was caused by eof.
+ //
+ if (is_.fail () && is_.eof ())
+ is_.clear (s);
+
+ is_.exceptions (old_state_);
+ }
+ }
+
+ stream_exception_controller (istream& is)
+ : is_ (is), old_state_ (is_.exceptions ())
+ {
+ is_.exceptions (old_state_ & ~istream::failbit);
+ }
+
+ private:
+ stream_exception_controller (const stream_exception_controller&);
+
+ stream_exception_controller&
+ operator= (const stream_exception_controller&);
+
+ private:
+ istream& is_;
+ istream::iostate old_state_;
+ };
+
+ parser::event_type parser::
+ next ()
+ {
+ event_type e (next_ ());
+
+ // Content-specific processing. Note that we handle characters in the
+ // characters_() Expat handler for two reasons. Firstly, it is faster
+ // to ignore the whitespaces at the source. Secondly, this allows us
+ // to distinguish between element and attribute characters. We can
+ // move this processing to the handler because the characters event
+ // is never queued.
+ //
+ switch (e)
+ {
+ case start_element:
+ {
+ switch (content ())
+ {
+ case empty:
+ throw parsing (name_, line_, column_, "element in empty content");
+ case simple:
+ throw parsing (name_, line_, column_, "element in simple content");
+ default:
+ break;
+ }
+
+ depth_++;
+ break;
+ }
+ case end_element:
+ {
+ if (!content_.empty () && content_.back ().depth == depth_)
+ content_.pop_back ();
+
+ depth_--;
+ break;
+ }
+ default:
+ break;
+ }
+
+ return e;
+ }
+
+ parser::event_type parser::
+ next_ ()
+ {
+ // See if we have any start namespace declarations we need to return.
+ //
+ if (start_ns_i_ < start_ns_.size ())
+ {
+ // Based on the previous event determine what's the next one must be.
+ //
+ switch (event_)
+ {
+ case start_namespace_decl:
+ {
+ if (++start_ns_i_ == start_ns_.size ())
+ {
+ start_ns_i_ = 0;
+ start_ns_.clear ();
+ pqname_ = &qname_;
+ break; // No more declarations.
+ }
+ // Fall through.
+ }
+ case start_element:
+ {
+ event_ = start_namespace_decl;
+ pqname_ = &start_ns_[start_ns_i_];
+ return event_;
+ }
+ default:
+ {
+ assert (false);
+ return eof;
+ }
+ }
+ }
+
+ // See if we have any attributes we need to return.
+ //
+ if (attr_i_ < attr_.size ())
+ {
+ // Based on the previous event determine what's the next one must be.
+ //
+ switch (event_)
+ {
+ case start_attribute:
+ {
+ event_ = characters;
+ pvalue_ = &attr_[attr_i_].value;
+ return event_;
+ }
+ case characters:
+ {
+ event_ = end_attribute; // Name is already set.
+ return event_;
+ }
+ case end_attribute:
+ {
+ if (++attr_i_ == attr_.size ())
+ {
+ attr_i_ = 0;
+ attr_.clear ();
+ pqname_ = &qname_;
+ pvalue_ = &value_;
+ break; // No more attributes.
+ }
+ // Fall through.
+ }
+ case start_element:
+ case start_namespace_decl:
+ {
+ event_ = start_attribute;
+ pqname_ = &attr_[attr_i_].qname;
+ return event_;
+ }
+ default:
+ {
+ assert (false);
+ return eof;
+ }
+ }
+ }
+
+ // See if we have any end namespace declarations we need to return.
+ //
+ if (end_ns_i_ < end_ns_.size ())
+ {
+ // Based on the previous event determine what's the next one must be.
+ //
+ switch (event_)
+ {
+ case end_namespace_decl:
+ {
+ if (++end_ns_i_ == end_ns_.size ())
+ {
+ end_ns_i_ = 0;
+ end_ns_.clear ();
+ pqname_ = &qname_;
+ break; // No more declarations.
+ }
+ // Fall through.
+ }
+ // The end namespace declaration comes before the end element
+ // which means it can follow pretty much any other event.
+ //
+ default:
+ {
+ event_ = end_namespace_decl;
+ pqname_ = &end_ns_[end_ns_i_];
+ return event_;
+ }
+ }
+ }
+
+ // Check the queue.
+ //
+ if (queue_ != eof)
+ {
+ event_type r (queue_);
+ queue_ = eof;
+ return r;
+ }
+
+ XML_ParsingStatus ps;
+ XML_GetParsingStatus (p_, &ps);
+
+ switch (ps.parsing)
+ {
+ case XML_INITIALIZED:
+ {
+ // As if we finished the previous chunk.
+ break;
+ }
+ case XML_PARSING:
+ {
+ assert (false);
+ return eof;
+ }
+ case XML_FINISHED:
+ {
+ return eof;
+ }
+ case XML_SUSPENDED:
+ {
+ switch (XML_ResumeParser (p_))
+ {
+ case XML_STATUS_SUSPENDED:
+ {
+ // If the parser is again in the suspended state, then
+ // that means we have the next event.
+ //
+ return event_;
+ }
+ case XML_STATUS_OK:
+ {
+ // Otherwise, we need to get and parse the next chunk of data
+ // unless this was the last chunk, in which case this is eof.
+ //
+ if (ps.finalBuffer)
+ return eof;
+
+ break;
+ }
+ case XML_STATUS_ERROR:
+ handle_error ();
+ }
+
+ break;
+ }
+ }
+
+ // Get and parse the next chunk of data until we get the next event
+ // or reach eof.
+ //
+ event_ = eof;
+ XML_Status s;
+ do
+ {
+ const size_t cap (4096);
+
+ char* b (static_cast<char*> (XML_GetBuffer (p_, cap)));
+ if (b == 0)
+ throw bad_alloc ();
+
+ // Temporarily unset the exception failbit. Also clear the fail bit
+ // when we reset the old state if it was caused by eof.
+ //
+ {
+ stream_exception_controller sec (is_);
+ is_.read (b, static_cast<streamsize> (cap));
+ }
+
+ s = XML_ParseBuffer (p_, static_cast<int> (is_.gcount ()), is_.eof ());
+
+ if (s == XML_STATUS_ERROR)
+ handle_error ();
+
+ } while (s != XML_STATUS_SUSPENDED && !is_.eof ());
+
+ return event_;
+ }
+
+ static void
+ split_name (const XML_Char* s, qname& qn)
+ {
+ string& ns (qn.namespace_ ());
+ string& name (qn.name ());
+ string& prefix (qn.prefix ());
+
+ const char* p (strchr (s, ' '));
+
+ if (p == 0)
+ {
+ ns.clear ();
+ name = s;
+ prefix.clear ();
+ }
+ else
+ {
+ ns.assign (s, 0, p - s);
+
+ s = p + 1;
+ p = strchr (s, ' ');
+
+ if (p == 0)
+ {
+ name = s;
+ prefix.clear ();
+ }
+ else
+ {
+ name.assign (s, 0, p - s);
+ prefix = p + 1;
+ }
+ }
+ }
+
+ void XMLCALL parser::
+ start_element_ (void* v, const XML_Char* name, const XML_Char** atts)
+ {
+ parser& p (*static_cast<parser*> (v));
+
+ XML_ParsingStatus ps;
+ XML_GetParsingStatus (p.p_, &ps);
+
+ // Expat has a (mis)-feature of a possibily calling handlers even
+ // after the non-resumable XML_StopParser call.
+ //
+ if (ps.parsing == XML_FINISHED)
+ return;
+
+ // Cannot be a followup event.
+ //
+ assert (ps.parsing == XML_PARSING);
+
+ p.event_ = start_element;
+ split_name (name, p.qname_);
+
+ p.line_ = XML_GetCurrentLineNumber (p.p_);
+ p.column_ = XML_GetCurrentColumnNumber (p.p_);
+
+ // Handle attributes.
+ //
+ if ((p.feature_ & receive_attributes) != 0)
+ {
+ for (; *atts != 0; atts += 2)
+ {
+ p.attr_.push_back (attribute ());
+ split_name (*atts, p.attr_.back ().qname);
+ p.attr_.back ().value = *(atts + 1);
+ }
+ }
+
+ XML_StopParser (p.p_, true);
+ }
+
+ void XMLCALL parser::
+ end_element_ (void* v, const XML_Char* name)
+ {
+ parser& p (*static_cast<parser*> (v));
+
+ XML_ParsingStatus ps;
+ XML_GetParsingStatus (p.p_, &ps);
+
+ // Expat has a (mis)-feature of a possibily calling handlers even
+ // after the non-resumable XML_StopParser call.
+ //
+ if (ps.parsing == XML_FINISHED)
+ return;
+
+ // This can be a followup event for empty elements (<foo/>). In this
+ // case the element name is already set.
+ //
+ if (ps.parsing != XML_PARSING)
+ p.queue_ = end_element;
+ else
+ {
+ // We may also have the end namespace declaration events which
+ // should come before the end element. If that's the case, then
+ // queue the end element and return the end namespace as the next
+ // event.
+ //
+ if (p.end_ns_i_ < p.end_ns_.size ())
+ {
+ p.event_ = end_namespace_decl;
+ p.queue_ = end_element;
+ }
+ else
+ p.event_ = end_element;
+
+ split_name (name, p.qname_);
+
+ p.line_ = XML_GetCurrentLineNumber (p.p_);
+ p.column_ = XML_GetCurrentColumnNumber (p.p_);
+
+ XML_StopParser (p.p_, true);
+ }
+ }
+
+ void XMLCALL parser::
+ characters_ (void* v, const XML_Char* s, int n)
+ {
+ parser& p (*static_cast<parser*> (v));
+
+ XML_ParsingStatus ps;
+ XML_GetParsingStatus (p.p_, &ps);
+
+ // Expat has a (mis)-feature of a possibily calling handlers even
+ // after the non-resumable XML_StopParser call.
+ //
+ if (ps.parsing == XML_FINISHED)
+ return;
+
+ // If this is empty of complex content, see if these are whitespaces.
+ //
+ switch (p.content ())
+ {
+ case empty:
+ case complex:
+ {
+ for (int i (0); i != n; ++i)
+ {
+ char c (s[i]);
+ if (c == 0x20 || c == 0x0A || c == 0x0D || c == 0x09)
+ continue;
+
+ // It would have been easier to throw the exception directly,
+ // however, the Expat code is most likely not exception safe.
+ //
+ p.line_ = XML_GetCurrentLineNumber (p.p_);
+ p.column_ = XML_GetCurrentColumnNumber (p.p_);
+ XML_StopParser (p.p_, false);
+ break;
+ }
+ return;
+ }
+ default:
+ break;
+ }
+
+ // This can be a followup event for another character event. In
+ // this case simply append the data.
+ //
+ if (ps.parsing != XML_PARSING)
+ {
+ assert (p.event_ == characters);
+ p.value_.append (s, n);
+ }
+ else
+ {
+ p.event_ = characters;
+ p.value_.assign (s, n);
+
+ p.line_ = XML_GetCurrentLineNumber (p.p_);
+ p.column_ = XML_GetCurrentColumnNumber (p.p_);
+
+ XML_StopParser (p.p_, true);
+ }
+ }
+
+ void XMLCALL parser::
+ start_namespace_decl_ (void* v, const XML_Char* prefix, const XML_Char* ns)
+ {
+ parser& p (*static_cast<parser*> (v));
+
+ XML_ParsingStatus ps;
+ XML_GetParsingStatus (p.p_, &ps);
+
+ // Expat has a (mis)-feature of a possibily calling handlers even
+ // after the non-resumable XML_StopParser call.
+ //
+ if (ps.parsing == XML_FINISHED)
+ return;
+
+ p.start_ns_.push_back (qname_type ());
+ p.start_ns_.back ().prefix () = (prefix != 0 ? prefix : "");
+ p.start_ns_.back ().namespace_ () = (ns != 0 ? ns : "");
+ }
+
+ void XMLCALL parser::
+ end_namespace_decl_ (void* v, const XML_Char* prefix)
+ {
+ parser& p (*static_cast<parser*> (v));
+
+ XML_ParsingStatus ps;
+ XML_GetParsingStatus (p.p_, &ps);
+
+ // Expat has a (mis)-feature of a possibily calling handlers even
+ // after the non-resumable XML_StopParser call.
+ //
+ if (ps.parsing == XML_FINISHED)
+ return;
+
+ p.end_ns_.push_back (qname_type ());
+ p.end_ns_.back ().prefix () = (prefix != 0 ? prefix : "");
+ }
+
+ static const char* parser_event_str[] =
+ {
+ "start element",
+ "end element",
+ "start attribute",
+ "end attribute",
+ "characters",
+ "start namespace declaration",
+ "end namespace declaration",
+ "end of file"
+ };
+
+ ostream&
+ operator<< (ostream& os, parser::event_type e)
+ {
+ return os << parser_event_str[e];
+ }
+ }
+}
diff --git a/cutl/xml/parser.hxx b/cutl/xml/parser.hxx
new file mode 100644
index 0000000..b61d26d
--- /dev/null
+++ b/cutl/xml/parser.hxx
@@ -0,0 +1,238 @@
+// file : cutl/xml/parser.hxx
+// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC
+// license : MIT; see accompanying LICENSE file
+
+#ifndef CUTL_XML_PARSER_HXX
+#define CUTL_XML_PARSER_HXX
+
+#include <string>
+#include <vector>
+#include <iosfwd>
+#include <cstddef> // std::size_t
+
+#include <cutl/details/config.hxx> // LIBCUTL_EXTERNAL_EXPAT
+
+#ifndef LIBCUTL_EXTERNAL_EXPAT
+# include <cutl/details/expat/expat.h>
+#else
+# include <expat.h>
+#endif
+
+// We only support UTF-8 expat.
+//
+#ifdef XML_UNICODE
+# error UTF-16 expat (XML_UNICODE defined) is not supported
+#endif
+
+#include <cutl/xml/qname.hxx>
+#include <cutl/xml/exception.hxx>
+
+#include <cutl/details/export.hxx>
+
+namespace cutl
+{
+ namespace xml
+ {
+ struct LIBCUTL_EXPORT parsing: exception
+ {
+ virtual
+ ~parsing () throw ();
+
+ parsing (const std::string& name,
+ unsigned long long line,
+ unsigned long long column,
+ const std::string& description);
+
+ const std::string&
+ name () const {return name_;}
+
+ unsigned long long
+ line () const {return line_;}
+
+ unsigned long long
+ column () const {return column_;}
+
+ const std::string&
+ description () const {return description_;}
+
+ virtual const char*
+ what () const throw ();
+
+ private:
+ std::string name_;
+ unsigned long long line_;
+ unsigned long long column_;
+ std::string description_;
+ std::string what_;
+ };
+
+ class LIBCUTL_EXPORT parser
+ {
+ public:
+ ~parser ();
+
+ typedef xml::qname qname_type;
+ typedef unsigned short feature_type;
+
+ static const feature_type receive_elements = 0x0001;
+ static const feature_type receive_characters = 0x0002;
+ static const feature_type receive_attributes = 0x0004;
+ static const feature_type receive_namespace_decls = 0x0008;
+
+ static const feature_type receive_default = receive_elements |
+ receive_characters |
+ receive_attributes;
+
+ // Parse std::istream. Name is used in diagnostics to identify the
+ // document being parsed. std::ios_base::failure exception is used
+ // to report io errors (badbit and failbit).
+ //
+ parser (std::istream&,
+ const std::string& name,
+ feature_type = receive_default);
+
+ enum event_type
+ {
+ // If adding new events, also update the stream insertion operator.
+ //
+ start_element,
+ end_element,
+ start_attribute,
+ end_attribute,
+ characters,
+ start_namespace_decl,
+ end_namespace_decl,
+ eof
+ };
+
+ event_type
+ next ();
+
+ const qname_type& qname () const {return *pqname_;}
+
+ const std::string& namespace_ () const {return pqname_->namespace_ ();}
+ const std::string& name () const {return pqname_->name ();}
+ const std::string& prefix () const {return pqname_->prefix ();}
+
+ const std::string& value () const {return *pvalue_;}
+
+ unsigned long long line () const {return line_;}
+ unsigned long long column () const {return column_;}
+
+ // Optional content processing.
+ //
+ public:
+ enum content_type
+ {
+ // element characters whitespaces
+ empty, // no no ignored
+ simple, // no yes preserved
+ complex, // yes no ignored
+ mixed // yes yes preserved
+ };
+
+ void
+ content (content_type c)
+ {
+ if (!content_.empty () && content_.back ().depth == depth_)
+ content_.back ().content = c;
+ else
+ content_.push_back (content_entry (depth_, c));
+ }
+
+ content_type
+ content () const
+ {
+ return !content_.empty () && content_.back ().depth == depth_
+ ? content_.back ().content : mixed;
+ }
+
+ private:
+ static void XMLCALL
+ start_element_ (void*, const XML_Char*, const XML_Char**);
+
+ static void XMLCALL
+ end_element_ (void*, const XML_Char*);
+
+ static void XMLCALL
+ characters_ (void*, const XML_Char*, int);
+
+ static void XMLCALL
+ start_namespace_decl_ (void*, const XML_Char*, const XML_Char*);
+
+ static void XMLCALL
+ end_namespace_decl_ (void*, const XML_Char*);
+
+ private:
+ event_type
+ next_ ();
+
+ void
+ handle_error ();
+
+ private:
+ std::istream& is_;
+ const std::string name_;
+ feature_type feature_;
+
+ XML_Parser p_;
+ std::size_t depth_;
+ event_type event_;
+ event_type queue_;
+
+ qname_type qname_;
+ std::string value_;
+
+ // These are used to avoid copying when we are handling attributes
+ // and namespace decls.
+ //
+ const qname_type* pqname_;
+ const std::string* pvalue_;
+
+ unsigned long long line_;
+ unsigned long long column_;
+
+ // Attributes.
+ //
+ struct attribute
+ {
+ qname_type qname;
+ std::string value;
+ };
+
+ typedef std::vector<attribute> attributes;
+
+ attributes attr_;
+ attributes::size_type attr_i_; // Index of the current attribute.
+
+ // Namespace declarations.
+ //
+ typedef std::vector<qname_type> namespace_decls;
+
+ namespace_decls start_ns_;
+ namespace_decls::size_type start_ns_i_; // Index of the current decl.
+
+ namespace_decls end_ns_;
+ namespace_decls::size_type end_ns_i_; // Index of the current decl.
+
+ // Content.
+ //
+ struct content_entry
+ {
+ content_entry (std::size_t d, content_type c)
+ : depth (d), content (c) {}
+
+ std::size_t depth;
+ content_type content;
+ };
+
+ std::vector<content_entry> content_;
+ };
+
+ LIBCUTL_EXPORT
+ std::ostream&
+ operator<< (std::ostream&, parser::event_type);
+ }
+}
+
+#endif // CUTL_XML_PARSER_HXX
diff --git a/cutl/xml/qname.cxx b/cutl/xml/qname.cxx
new file mode 100644
index 0000000..ce7cca1
--- /dev/null
+++ b/cutl/xml/qname.cxx
@@ -0,0 +1,22 @@
+// file : cutl/xml/qname.cxx
+// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC
+// license : MIT; see accompanying LICENSE file
+
+#include <ostream>
+
+#include <cutl/xml/qname.hxx>
+
+using namespace std;
+
+namespace cutl
+{
+ namespace xml
+ {
+ ostream&
+ operator<< (ostream& os, const qname& qn)
+ {
+ const string& ns (qn.namespace_ ());
+ return os << ns << (ns.empty () ? "" : "#") << qn.name ();
+ }
+ }
+}
diff --git a/cutl/xml/qname.hxx b/cutl/xml/qname.hxx
new file mode 100644
index 0000000..ab1c8ea
--- /dev/null
+++ b/cutl/xml/qname.hxx
@@ -0,0 +1,74 @@
+// file : cutl/xml/qname.hxx
+// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC
+// license : MIT; see accompanying LICENSE file
+
+#ifndef CUTL_XML_QNAME_HXX
+#define CUTL_XML_QNAME_HXX
+
+#include <string>
+#include <iosfwd>
+
+#include <cutl/details/export.hxx>
+
+namespace cutl
+{
+ namespace xml
+ {
+ // Note that the optional prefix is just a "syntactic sugar". In
+ // particular, it is ignored by the comparison operators and the
+ // std::ostream insertion operator.
+ //
+ class LIBCUTL_EXPORT qname
+ {
+ public:
+ qname () {}
+ qname (const std::string& name): name_ (name) {}
+ qname (const std::string& ns, const std::string& name)
+ : ns_ (ns), name_ (name) {}
+ qname (const std::string& ns,
+ const std::string& name,
+ const std::string& prefix)
+ : ns_ (ns), name_ (name), prefix_ (prefix) {}
+
+ const std::string& namespace_ () const {return ns_;}
+ const std::string& name () const {return name_;}
+ const std::string& prefix () const {return prefix_;}
+
+ std::string& namespace_ () {return ns_;}
+ std::string& name () {return name_;}
+ std::string& prefix () {return prefix_;}
+
+ // Note that comparison operators
+ //
+ public:
+ friend bool
+ operator< (const qname& x, const qname& y)
+ {
+ return x.ns_ < y.ns_ || (x.ns_ == y.ns_ && x.name_ < y.name_);
+ }
+
+ friend bool
+ operator== (const qname& x, const qname& y)
+ {
+ return x.ns_ == y.ns_ && x.name_ == y.name_;
+ }
+
+ friend bool
+ operator!= (const qname& x, const qname& y)
+ {
+ return !(x == y);
+ }
+
+ private:
+ std::string ns_;
+ std::string name_;
+ std::string prefix_;
+ };
+
+ LIBCUTL_EXPORT
+ std::ostream&
+ operator<< (std::ostream&, const qname&);
+ }
+}
+
+#endif // CUTL_XML_QNAME_HXX
diff --git a/cutl/xml/serializer.cxx b/cutl/xml/serializer.cxx
new file mode 100644
index 0000000..66c78a4
--- /dev/null
+++ b/cutl/xml/serializer.cxx
@@ -0,0 +1,245 @@
+// file : cutl/xml/serializer.cxx
+// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC
+// license : MIT; see accompanying LICENSE file
+
+#include <new> // std::bad_alloc
+#include <cstring> // std::strlen
+
+#include <cutl/xml/serializer.hxx>
+
+using namespace std;
+
+namespace cutl
+{
+ namespace xml
+ {
+ // serialization
+ //
+ serialization::
+ ~serialization () throw () {}
+
+ serialization::
+ serialization (const string& n, const string& d)
+ : name_ (n), description_ (d)
+ {
+ if (!n.empty ())
+ {
+ what_ += n;
+ what_ += ": ";
+ }
+
+ what_ += "error: ";
+ what_ += d;
+ }
+
+ char const* serialization::
+ what () const throw ()
+ {
+ return what_.c_str ();
+ }
+
+ // serializer
+ //
+ extern "C" genxStatus
+ genx_write (void* p, constUtf8 us)
+ {
+ // It would have been easier to throw the exception directly,
+ // however, the Genx code is most likely not exception safe.
+ //
+ ostream* os (static_cast<ostream*> (p));
+ const char* s (reinterpret_cast<const char*> (us));
+ os->write (s, static_cast<streamsize> (strlen (s)));
+ return os->good () ? GENX_SUCCESS : GENX_IO_ERROR;
+ }
+
+ extern "C" genxStatus
+ genx_write_bound (void* p, constUtf8 start, constUtf8 end)
+ {
+ ostream* os (static_cast<ostream*> (p));
+ const char* s (reinterpret_cast<const char*> (start));
+ streamsize n (static_cast<streamsize> (end - start));
+ os->write (s, n);
+ return os->good () ? GENX_SUCCESS : GENX_IO_ERROR;
+ }
+
+ extern "C" genxStatus
+ genx_flush (void* p)
+ {
+ ostream* os (static_cast<ostream*> (p));
+ os->flush ();
+ return os->good () ? GENX_SUCCESS : GENX_IO_ERROR;
+ }
+
+ serializer::
+ ~serializer ()
+ {
+ if (s_ != 0)
+ genxDispose (s_);
+ }
+
+ serializer::
+ serializer (ostream& os, const string& name, unsigned short ind)
+ : os_ (os), os_state_ (os.exceptions ()), name_ (name), depth_ (0)
+ {
+ // Temporarily disable exceptions on the stream.
+ //
+ os_.exceptions (ostream::goodbit);
+
+ // Allocate the serializer. Make sure nothing else can throw after
+ // this call since otherwise we will leak it.
+ //
+ s_ = genxNew (0, 0, 0);
+
+ if (s_ == 0)
+ throw bad_alloc ();
+
+ genxSetUserData (s_, &os_);
+
+ if (ind != 0)
+ genxSetPrettyPrint (s_, ind);
+
+ sender_.send = &genx_write;
+ sender_.sendBounded = &genx_write_bound;
+ sender_.flush = &genx_flush;
+
+ if (genxStatus e = genxStartDocSender (s_, &sender_))
+ {
+ string m (genxGetErrorMessage (s_, e));
+ genxDispose (s_);
+ throw serialization (name, m);
+ }
+ }
+
+ void serializer::
+ handle_error (genxStatus e)
+ {
+ switch (e)
+ {
+ case GENX_ALLOC_FAILED:
+ throw bad_alloc ();
+ case GENX_IO_ERROR:
+ // Restoring the original exception state should trigger the
+ // exception. If it doesn't (e.g., because the user didn't
+ // configure the stream to throw), then fall back to the
+ // serialiation exception.
+ //
+ os_.exceptions (os_state_);
+ // Fall through.
+ default:
+ throw serialization (name_, genxGetErrorMessage (s_, e));
+ }
+ }
+
+ void serializer::
+ start_element (const string& ns, const string& name)
+ {
+ if (genxStatus e = genxStartElementLiteral (
+ s_,
+ reinterpret_cast<constUtf8> (ns.empty () ? 0 : ns.c_str ()),
+ reinterpret_cast<constUtf8> (name.c_str ())))
+ handle_error (e);
+
+ depth_++;
+ }
+
+ void serializer::
+ end_element ()
+ {
+ if (genxStatus e = genxEndElement (s_))
+ handle_error (e);
+
+ // Call EndDocument() if we are past the root element.
+ //
+ if (--depth_ == 0)
+ {
+ if (genxStatus e = genxEndDocument (s_))
+ handle_error (e);
+
+ // Also restore the original exception state on the stream.
+ //
+ os_.exceptions (os_state_);
+ }
+ }
+
+ void serializer::
+ start_attribute (const string& ns, const string& name)
+ {
+ if (genxStatus e = genxStartAttributeLiteral (
+ s_,
+ reinterpret_cast<constUtf8> (ns.empty () ? 0 : ns.c_str ()),
+ reinterpret_cast<constUtf8> (name.c_str ())))
+ handle_error (e);
+ }
+
+ void serializer::
+ end_attribute ()
+ {
+ if (genxStatus e = genxEndAttribute (s_))
+ handle_error (e);
+ }
+
+ void serializer::
+ attribute (const string& ns,
+ const string& name,
+ const string& value)
+ {
+ if (genxStatus e = genxAddAttributeLiteral (
+ s_,
+ reinterpret_cast<constUtf8> (ns.empty () ? 0 : ns.c_str ()),
+ reinterpret_cast<constUtf8> (name.c_str ()),
+ reinterpret_cast<constUtf8> (value.c_str ())))
+ handle_error (e);
+ }
+
+ void serializer::
+ characters (const string& value)
+ {
+ if (genxStatus e = genxAddCountedText (
+ s_,
+ reinterpret_cast<constUtf8> (value.c_str ()),
+ static_cast<int> (value.size ())))
+ handle_error (e);
+ }
+
+ void serializer::
+ namespace_decl (const string& ns, const string& p)
+ {
+ if (genxStatus e = ns.empty () && p.empty ()
+ ? genxUnsetDefaultNamespace (s_)
+ : genxAddNamespaceLiteral (
+ s_,
+ reinterpret_cast<constUtf8> (ns.c_str ()),
+ reinterpret_cast<constUtf8> (p.c_str ())))
+ handle_error (e);
+ }
+
+ void serializer::
+ xml_decl (const string& ver, const string& enc, const string& stl)
+ {
+ if (genxStatus e = genxXmlDeclaration (
+ s_,
+ reinterpret_cast<constUtf8> (ver.c_str ()),
+ (enc.empty () ? 0 : reinterpret_cast<constUtf8> (enc.c_str ())),
+ (stl.empty () ? 0 : reinterpret_cast<constUtf8> (stl.c_str ()))))
+ handle_error (e);
+ }
+
+ bool serializer::
+ lookup_namespace_prefix (const string& ns, string& p)
+ {
+ // Currently Genx will create a namespace mapping if one doesn't
+ // already exist.
+ //
+ genxStatus e;
+ genxNamespace gns (
+ genxDeclareNamespace (
+ s_, reinterpret_cast<constUtf8> (ns.c_str ()), 0, &e));
+
+ if (e != GENX_SUCCESS)
+ handle_error (e);
+
+ p = reinterpret_cast<const char*> (genxGetNamespacePrefix (gns));
+ return true;
+ }
+ }
+}
diff --git a/cutl/xml/serializer.hxx b/cutl/xml/serializer.hxx
new file mode 100644
index 0000000..e3fef54
--- /dev/null
+++ b/cutl/xml/serializer.hxx
@@ -0,0 +1,154 @@
+// file : cutl/xml/serializer.hxx
+// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC
+// license : MIT; see accompanying LICENSE file
+
+#ifndef CUTL_XML_SERIALIZER_HXX
+#define CUTL_XML_SERIALIZER_HXX
+
+#include <string>
+#include <ostream>
+#include <cstddef> // std::size_t
+
+#include <cutl/details/genx/genx.h>
+
+#include <cutl/xml/qname.hxx>
+#include <cutl/xml/exception.hxx>
+
+#include <cutl/details/export.hxx>
+
+namespace cutl
+{
+ namespace xml
+ {
+ struct LIBCUTL_EXPORT serialization: exception
+ {
+ virtual
+ ~serialization () throw ();
+
+ serialization (const std::string& name,
+ const std::string& description);
+
+ const std::string&
+ name () const {return name_;}
+
+ const std::string&
+ description () const {return description_;}
+
+ virtual const char*
+ what () const throw ();
+
+ private:
+ std::string name_;
+ std::string description_;
+ std::string what_;
+ };
+
+ class LIBCUTL_EXPORT serializer
+ {
+ public:
+ ~serializer ();
+
+ typedef xml::qname qname_type;
+
+ // Serialize to std::ostream. Name is used in diagnostics to identify
+ // the document being serialized. std::ios_base::failure exception is
+ // used to report io errors (badbit and failbit). The indentation
+ // argument specifies the number of indentation spaces that should
+ // be used for pretty-printing. If 0 is passed, no pretty-printing
+ // is performed.
+ //
+ serializer (std::ostream&,
+ const std::string& name,
+ unsigned short indentation = 2);
+
+ // Serialization functions.
+ //
+ public:
+
+ // Elements.
+ //
+ void
+ start_element (const qname_type& qname);
+
+ void
+ start_element (const std::string& name);
+
+ void
+ start_element (const std::string& ns, const std::string& name);
+
+ void
+ end_element ();
+
+ // Attributes.
+ //
+ void
+ start_attribute (const qname_type& qname);
+
+ void
+ start_attribute (const std::string& name);
+
+ void
+ start_attribute (const std::string& ns, const std::string& name);
+
+ void
+ end_attribute ();
+
+ void
+ attribute (const qname_type& qname, const std::string& value);
+
+ void
+ attribute (const std::string& name, const std::string& value);
+
+ void
+ attribute (const std::string& ns,
+ const std::string& name,
+ const std::string& value);
+
+ // Characters.
+ //
+ void
+ characters (const std::string& value);
+
+ // Namespaces declaration. If prefix is empty, then the default
+ // namespace is declared. If both prefix and namespace are empty,
+ // then the default namespace declaration is cleared (xmlns="").
+ //
+ void
+ namespace_decl (const std::string& ns, const std::string& prefix);
+
+ // XML Declaration. If encoding or standalone are not specified,
+ // then these attributes are omitted from the output.
+ //
+ void
+ xml_decl (const std::string& version = "1.0",
+ const std::string& encoding = "UTF-8",
+ const std::string& standalone = "");
+
+ // Other functions.
+ //
+ public:
+ // Return true if there is a mapping. In this case, prefix contains
+ // the mapped prefix.
+ //
+ bool
+ lookup_namespace_prefix (const std::string& ns, std::string& prefix);
+
+ private:
+ void
+ handle_error (genxStatus);
+
+ private:
+ std::ostream& os_;
+ std::ostream::iostate os_state_; // Original exception state.
+ const std::string name_;
+
+ genxWriter s_;
+ genxSender sender_;
+ std::size_t depth_;
+ };
+ }
+}
+
+#include <cutl/xml/serializer.ixx>
+
+#endif // CUTL_XML_SERIALIZER_HXX
diff --git a/cutl/xml/serializer.ixx b/cutl/xml/serializer.ixx
new file mode 100644
index 0000000..115b77b
--- /dev/null
+++ b/cutl/xml/serializer.ixx
@@ -0,0 +1,45 @@
+// file : cutl/xml/serializer.ixx
+// copyright : Copyright (c) 2009-2013 Code Synthesis Tools CC
+// license : MIT; see accompanying LICENSE file
+
+namespace cutl
+{
+ namespace xml
+ {
+ inline void serializer::
+ start_element (const qname_type& qname)
+ {
+ start_element (qname.namespace_ (), qname.name ());
+ }
+
+ inline void serializer::
+ start_element (const std::string& name)
+ {
+ start_element (std::string (), name);
+ }
+
+ inline void serializer::
+ start_attribute (const qname_type& qname)
+ {
+ start_attribute (qname.namespace_ (), qname.name ());
+ }
+
+ inline void serializer::
+ start_attribute (const std::string& name)
+ {
+ start_attribute (std::string (), name);
+ }
+
+ inline void serializer::
+ attribute (const qname_type& qname, const std::string& value)
+ {
+ attribute (qname.namespace_ (), qname.name (), value);
+ }
+
+ inline void serializer::
+ attribute (const std::string& name, const std::string& value)
+ {
+ attribute (std::string (), name, value);
+ }
+ }
+}