From 884dea7531962b17ef843ac2175faa050e8b0758 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 10 Apr 2014 12:57:06 +0200 Subject: Add support for ordered types, mixed content --- doc/cxx/tree/manual/index.xhtml | 665 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 638 insertions(+), 27 deletions(-) (limited to 'doc/cxx/tree/manual/index.xhtml') diff --git a/doc/cxx/tree/manual/index.xhtml b/doc/cxx/tree/manual/index.xhtml index 85a5f83..052f2b3 100644 --- a/doc/cxx/tree/manual/index.xhtml +++ b/doc/cxx/tree/manual/index.xhtml @@ -298,6 +298,7 @@ 2.8.1Mapping for Members with the One Cardinality Class 2.8.2Mapping for Members with the Optional Cardinality Class 2.8.3Mapping for Members with the Sequence Cardinality Class + 2.8.4Element Order @@ -321,7 +322,8 @@ 2.12.1Mapping for any with the One Cardinality Class 2.12.2Mapping for any with the Optional Cardinality Class 2.12.3Mapping for any with the Sequence Cardinality Class - 2.12.4Mapping for anyAttribute + 2.12.4Element Wildcard Order + 2.12.5Mapping for anyAttribute @@ -2237,7 +2239,7 @@ public:

is mapped to:

-class color: xml_schema::string
+class color: public xml_schema::string
 {
 public:
   enum value
@@ -2423,7 +2425,7 @@ public:
   

is mapped to:

-class complex: xml_schema::type
+class complex: public xml_schema::type
 {
 public:
   object (const int& a, const xml_schema::string& b);
@@ -2441,7 +2443,7 @@ public:
 
 };
 
-class object: xml_schema::type
+class object: public xml_schema::type
 {
 public:
   object (const bool& s_one, const complex& c_one);
@@ -2484,7 +2486,7 @@ public:
   

is mapped to:

-class object: xml_schema::string
+class object: public xml_schema::string
 {
 public:
   object (const xml_schema::language& lang);
@@ -2529,7 +2531,7 @@ public:
   

is mapped to:

-class color: xml_schema::string
+class color: public xml_schema::string
 {
 public:
   enum value
@@ -2666,7 +2668,7 @@ public:
   

is mapped to:

-class object: xml_schema::type
+class object: public xml_schema::type
 {
 public:
   typedef xml_schema::string member_type;
@@ -2692,7 +2694,7 @@ public:
   

is mapped to:

-class object: xml_schema::type
+class object: public xml_schema::type
 {
 public:
   typedef xml_schema::string data_type;
@@ -2749,7 +2751,7 @@ public:
   

is mapped to:

-class object: xml_schema::type
+class object: public xml_schema::type
 {
 public:
   // Type definitions.
@@ -2782,7 +2784,7 @@ public:
       member's type, for example:

-class object: xml_schema::type
+class object: public xml_schema::type
 {
 public:
   ...
@@ -2873,7 +2875,7 @@ f (object& o)
   

is mapped to:

-class object: xml_schema::type
+class object: public xml_schema::type
 {
 public:
   // Type definitions.
@@ -3102,7 +3104,7 @@ f (object& o)
   

is mapped to:

-class object: xml_schema::type
+class object: public xml_schema::type
 {
 public:
   // Type definitions.
@@ -3221,6 +3223,415 @@ f (object& o)
 }
   
+

2.8.4 Element Order

+ +

C++/Tree is a "flattening" mapping in a sense that many levels of + nested compositors (choice and sequence), + all potentially with their own cardinalities, are in the end mapped + to a flat set of elements with one of the three cardinality classes + discussed in the previous sections. While this results in a simple + and easy to use API for most types, in certain cases, the order of + elements in the actual XML documents is not preserved once parsed + into the object model. And sometimes such order has + application-specific significance. As an example, consider a schema + that defines a batch of bank transactions:

+ +
+<complexType name="withdraw">
+  <sequence>
+    <element name="account" type="unsignedInt"/>
+    <element name="amount" type="unsignedInt"/>
+  </sequence>
+</complexType>
+
+<complexType name="deposit">
+  <sequence>
+    <element name="account" type="unsignedInt"/>
+    <element name="amount" type="unsignedInt"/>
+  </sequence>
+</complexType>
+
+<complexType name="batch">
+  <choice minOccurs="0" maxOccurs="unbounded">
+    <element name="withdraw" type="withdraw"/>
+    <element name="deposit" type="deposit"/>
+  </choice>
+</complexType>
+  
+ +

The batch can contain any number of transactions in any order + but the order of transactions in each actual batch is significant. + For instance, consider what could happen if we reorder the + transactions and apply all the withdrawals before deposits.

+ +

For the batch schema type defined above the default + C++/Tree mapping will produce a C++ class that contains a pair of + sequence containers, one for each of the two elements. While this + will capture the content (transactions), the order of this content + as it appears in XML will be lost. Also, if we try to serialize the + batch we just loaded back to XML, all the withdrawal transactions + will appear before deposits.

+ +

To overcome this limitation of a flattening mapping, C++/Tree + allows us to mark certain XML Schema types, for which content + order is important, as ordered.

+ +

There are several command line options that control which + schema types are treated as ordered. To make an individual + type ordered, we use the --ordered-type option, + for example:

+ +
+--ordered-type batch
+  
+ +

To automatically treat all the types that are derived from an ordered + type also ordered, we use the --ordered-type-derived + option. This is primarily useful if you would like to iterate + over the complete hierarchy's content using the content order + sequence (discussed below).

+ +

Ordered types are also useful for handling mixed content. To + automatically mark all the types with mixed content as ordered + we use the --ordered-type-mixed option. For more + information on handling mixed content see Section + 2.13, "Mapping for Mixed Content Models".

+ +

Finally, we can mark all the types in the schema we are + compiling with the --ordered-type-all option. + You should only resort to this option if all the types in + your schema truly suffer from the loss of content + order since, as we will discuss shortly, ordered types + require extra effort to access and, especially, modify. + See the + XSD + Compiler Command Line Manual for more information on + these options.

+ +

Once a type is marked ordered, C++/Tree alters its mapping + in several ways. Firstly, for each local element, element + wildcard (Section 2.12.4, "Element Wildcard + Order"), and mixed content text (Section + 2.13, "Mapping for Mixed Content Models") in this type, a + content id constant is generated. Secondly, an addition sequence + is added to the class that captures the content order. Here + is how the mapping of our batch class changes + once we make it ordered:

+ +
+class batch: public xml_schema::type
+{
+public:
+  // withdraw
+  //
+  typedef withdraw withdraw_type;
+  typedef sequence<withdraw_type> withdraw_sequence;
+  typedef withdraw_sequence::iterator withdraw_iterator;
+  typedef withdraw_sequence::const_iterator withdraw_const_iterator;
+
+  static const std::size_t withdraw_id = 1;
+
+  const withdraw_sequence&
+  withdraw () const;
+
+  withdraw_sequence&
+  withdraw ();
+
+  void
+  withdraw (const withdraw_sequence&);
+
+  // deposit
+  //
+  typedef deposit deposit_type;
+  typedef sequence<deposit_type> deposit_sequence;
+  typedef deposit_sequence::iterator deposit_iterator;
+  typedef deposit_sequence::const_iterator deposit_const_iterator;
+
+  static const std::size_t deposit_id = 2;
+
+  const deposit_sequence&
+  deposit () const;
+
+  deposit_sequence&
+  deposit ();
+
+  void
+  deposit (const deposit_sequence&);
+
+  // content_order
+  //
+  typedef xml_schema::content_order content_order_type;
+  typedef std::vector<content_order_type> content_order_sequence;
+  typedef content_order_sequence::iterator content_order_iterator;
+  typedef content_order_sequence::const_iterator content_order_const_iterator;
+
+  const content_order_sequence&
+  content_order () const;
+
+  content_order_sequence&
+  content_order ();
+
+  void
+  content_order (const content_order_sequence&);
+
+  ...
+};
+  
+ +

Notice the withdraw_id and deposit_id + content ids as well as the extra content_order + sequence that does not correspond to any element in the + schema definition. The other changes to the mapping for ordered + types has to do with XML parsing and serialization code. During + parsing the content order is captured in the content_order + sequence while during serialization this sequence is used to + determine the order in which content is serialized. The + content_order sequence is also copied during + copy construction and assigned during copy assignment. It is also + taken into account during comparison.

+ +

The entry type of the content_order sequence is the + xml_schema::content_order type that has the following + interface:

+ +
+namespace xml_schema
+{
+  struct content_order
+  {
+    content_order (std::size_t id, std::size_t index = 0);
+
+    std::size_t id;
+    std::size_t index;
+  };
+
+  bool
+  operator== (const content_order&, const content_order&);
+
+  bool
+  operator!= (const content_order&, const content_order&);
+
+  bool
+  operator< (const content_order&, const content_order&);
+}
+  
+ +

The content_order sequence describes the order of + content (elements, including wildcards, as well as mixed content + text). Each entry in this sequence consists of the content id + (for example, withdraw_id or deposit_id + in our case) as well as, for elements of the sequence cardinality + class, an index into the corresponding sequence container (the + index is unused for the one and optional cardinality classes). + For example, in our case, if the content id is withdraw_id, + then the index will point into the withdraw element + sequence.

+ +

With all this information we can now examine how to iterate over + transaction in the batch in content order:

+ +
+batch& b = ...
+
+for (batch::content_order_const_iterator i (b.content_order ().begin ());
+     i != b.content_order ().end ();
+     ++i)
+{
+  switch (i->id)
+  {
+  case batch::withdraw_id:
+    {
+      const withdraw& t (b.withdraw ()[i->index]);
+      cerr << t.account () << " withdraw " << t.amount () << endl;
+      break;
+    }
+  case batch::deposit_id:
+    {
+      const deposit& t (b.deposit ()[i->index]);
+      cerr << t.account () << " deposit " << t.amount () << endl;
+      break;
+    }
+  default:
+    {
+      assert (false); // Unknown content id.
+    }
+  }
+}
+  
+ +

If we serialized our batch back to XML, we would also see that the + order of transactions in the output is exactly the same as in the + input rather than all the withdrawals first followed by all the + deposits.

+ +

The most complex aspect of working with ordered types is + modifications. Now we not only need to change the content, + but also remember to update the order information corresponding + to this change. As a first example, we add a deposit transaction + to the batch:

+ +
+using xml_schema::content_order;
+
+batch::deposit_sequence& d (b.deposit ());
+batch::withdraw_sequence& w (b.withdraw ());
+batch::content_order_sequence& co (b.content_order ());
+
+d.push_back (deposit (123456789, 100000));
+co.push_back (content_order (batch::deposit_id, d.size () - 1));
+  
+ +

In the above example we first added the content (deposit + transaction) and then updated the content order information + by adding an entry with deposit_id content + id and the index of the just added deposit transaction.

+ +

Removing the last transaction can be easy if we know which + transaction (deposit or withdrawal) is last:

+ +
+d.pop_back ();
+co.pop_back ();
+  
+ +

If, however, we do not know which transaction is last, then + things get a bit more complicated:

+ +
+switch (co.back ().id)
+{
+case batch::withdraw_id:
+  {
+    d.pop_back ();
+    break;
+  }
+case batch::deposit_id:
+  {
+    w.pop_back ();
+    break;
+  }
+}
+
+co.pop_back ();
+  
+ +

The following example shows how to add a transaction at the + beginning of the batch:

+ +
+w.push_back (withdraw (123456789, 100000));
+co.insert (co.begin (),
+           content_order (batch::withdraw_id, w.size () - 1));
+  
+ +

Note also that when we merely modify the content of one + of the elements in place, we do not need to update its + order since it doesn't change. For example, here is how + we can change the amount in the first withdrawal:

+ +
+w[0].amount (10000);
+  
+ +

For the complete working code shown in this section refer to the + order/element example in the + examples/cxx/tree/ directory in the XSD distribution.

+ +

If both the base and derived types are ordered, then the + content order sequence is only added to the base and the content + ids are unique within the whole hierarchy. In this case + the content order sequence for the derived type contains + ordering information for both base and derived content.

+ +

In some applications we may need to perform more complex + content processing. For example, in our case, we may need + to remove all the withdrawal transactions. The default + container, std::vector, is not particularly + suitable for such operations. What may be required by + some applications is a multi-index container that not + only allows us to iterate in content order similar to + std::vector but also search by the content + id as well as the content id and index pair.

+ +

While C++/Tree does not provide this functionality by + default, it allows us to specify a custom container + type for content order with the --order-container + command line option. The only requirement from the + generated code side for such a container is to provide + the vector-like push_back(), + size(), and const iteration interfaces.

+ +

As an example, here is how we can use the Boost Multi-Index + container for content order. First we create the + content-order-container.hxx header with the + following definition (in C++11, use the alias template + instead):

+ +
+#ifndef CONTENT_ORDER_CONTAINER
+#define CONTENT_ORDER_CONTAINER
+
+#include <cstddef> // std::size_t
+
+#include <boost/multi_index_container.hpp>
+#include <boost/multi_index/member.hpp>
+#include <boost/multi_index/identity.hpp>
+#include <boost/multi_index/ordered_index.hpp>
+#include <boost/multi_index/random_access_index.hpp>
+
+struct by_id {};
+struct by_id_index {};
+
+template <typename T>
+struct content_order_container:
+  boost::multi_index::multi_index_container<
+    T,
+    boost::multi_index::indexed_by<
+      boost::multi_index::random_access<>,
+      boost::multi_index::ordered_unique<
+        boost::multi_index::tag<by_id_index>,
+        boost::multi_index::identity<T>
+      >,
+      boost::multi_index::ordered_non_unique<
+        boost::multi_index::tag<by_id>,
+        boost::multi_index::member<T, std::size_t, &T::id>
+      >
+    >
+  >
+{};
+
+#endif
+  
+ +

Next we add the following two XSD compiler options to include + this header into every generated header file and to use the + custom container type (see the XSD compiler command line manual + for more information on shell quoting for the first option):

+ +
+--hxx-prologue '#include "content-order-container.hxx"'
+--order-container content_order_container
+  
+ +

With these changes we can now use the multi-index functionality, + for example, to search for a specific content id:

+ +
+typedef batch::content_order_sequence::index<by_id>::type id_set;
+typedef id_set::iterator id_iterator;
+
+const id_set& ids (b.content_order ().get<by_id> ());
+
+std::pair<id_iterator, id_iterator> r (
+  ids.equal_range (std::size_t (batch::deposit_id));
+
+for (id_iterator i (r.first); i != r.second; ++i)
+{
+  const deposit& t (b.deposit ()[i->index]);
+  cerr << t.account () << " deposit " << t.amount () << endl;
+}
+  
+

2.9 Mapping for Global Elements

An XML Schema element definition is called global if it appears @@ -3729,7 +4140,7 @@ f (root& r)

is mapped to:

-class object: xml_schema::type
+class object: public xml_schema::type
 {
 public:
   // any
@@ -3807,7 +4218,7 @@ public:
   

is mapped to:

-class object: xml_schema::type
+class object: public xml_schema::type
 {
 public:
   // Accessors.
@@ -3888,7 +4299,7 @@ f (object& o, const xercesc::DOMElement& e)
   

is mapped to:

-class object: xml_schema::type
+class object: public xml_schema::type
 {
 public:
   // Type definitions.
@@ -4071,7 +4482,7 @@ f (object& o, const xercesc::DOMElement& e)
   

is mapped to:

-class object: xml_schema::type
+class object: public xml_schema::type
 {
 public:
   // Type definitions.
@@ -4314,8 +4725,84 @@ f (object& o, const xercesc::DOMElement& e)
 }
   
+

2.12.4 Element Wildcard Order

+ +

Similar to elements, element wildcards in ordered types + (Section 2.8.4, "Element Order") are assigned + content ids and are included in the content order sequence. + Continuing with the bank transactions example started in Section + 2.8.4, we can extend the batch by allowing custom transactions:

+ +
+<complexType name="batch">
+  <choice minOccurs="0" maxOccurs="unbounded">
+    <element name="withdraw" type="withdraw"/>
+    <element name="deposit" type="deposit"/>
+    <any namespace="##other" processContents="lax"/>
+  </choice>
+</complexType>
+  
+ +

This will lead to the following changes in the generated + batch C++ class:

+ +
+class batch: public xml_schema::type
+{
+public:
+  ...
+
+  // any
+  //
+  typedef element_sequence any_sequence;
+  typedef any_sequence::iterator any_iterator;
+  typedef any_sequence::const_iterator any_const_iterator;
+
+  static const std::size_t any_id = 3UL;
+
+  const any_sequence&
+  any () const;
+
+  any_sequence&
+  any ();
+
+  void
+  any (const any_sequence&);
+
+  ...
+};
+  
+ +

With this change we also need to update the iteration code to handle + the new content id:

+ +
+for (batch::content_order_const_iterator i (b.content_order ().begin ());
+     i != b.content_order ().end ();
+     ++i)
+{
+  switch (i->id)
+  {
+    ...
+
+  case batch::any_id:
+    {
+      const DOMElement& e (b.any ()[i->index]);
+      ...
+      break;
+    }
+
+    ...
+  }
+}
+  
+ +

For the complete working code that shows the use of wildcards in + ordered types refer to the order/element example in + the examples/cxx/tree/ directory in the XSD + distribution.

-

2.12.4 Mapping for anyAttribute

+

2.12.5 Mapping for anyAttribute

For anyAttribute the type definitions consist of an alias of the container type with name any_attribute_set @@ -4353,7 +4840,7 @@ f (object& o, const xercesc::DOMElement& e)

is mapped to:

-class object: xml_schema::type
+class object: public xml_schema::type
 {
 public:
   // Type definitions.
@@ -4594,14 +5081,138 @@ f (object& o, const xercesc::DOMAttr& a)
 
   

2.13 Mapping for Mixed Content Models

-

XML Schema mixed content models do not have a direct C++ mapping. - Instead, information in XML instance documents, corresponding to - a mixed content model, can be accessed using generic DOM nodes that - can optionally be associated with object model nodes. See - Section 5.1, "DOM Association" for more - information about keeping association with DOM nodes. -

+

For XML Schema types with mixed content models C++/Tree provides + mapping support only if the type is marked as ordered + (Section 2.8.4, "Element Order"). Use the + --ordered-type-mixed XSD compiler option to + automatically mark all types with mixed content as ordered.

+ +

For an ordered type with mixed content, C++/Tree adds an extra + text content sequence that is used to store the text fragments. + This text content sequence is also assigned the content id and + its entries are included in the content order sequence, just + like elements. As a result, it is possible to capture the order + between elements and text fragments.

+ +

As an example, consider the following schema that describes text + with embedded links:

+ +
+<complexType name="anchor">
+  <simpleContent>
+    <extension base="string">
+      <attribute name="href" type="anyURI" use="required"/>
+    </extension>
+  </simpleContent>
+</complexType>
+
+<complexType name="text" mixed="true">
+  <sequence>
+    <element name="a" type="anchor" minOccurs="0" maxOccurs="unbounded"/>
+  </sequence>
+</complexType>
+  
+ +

The generated text C++ class will provide the following + API (assuming it is marked as ordered):

+ +
+class text: public xml_schema::type
+{
+public:
+  // a
+  //
+  typedef anchor a_type;
+  typedef sequence<a_type> a_sequence;
+  typedef a_sequence::iterator a_iterator;
+  typedef a_sequence::const_iterator a_const_iterator;
+
+  static const std::size_t a_id = 1UL;
+
+  const a_sequence&
+  a () const;
+
+  a_sequence&
+  a ();
+
+  void
+  a (const a_sequence&);
+
+  // text_content
+  //
+  typedef xml_schema::string text_content_type;
+  typedef sequence<text_content_type> text_content_sequence;
+  typedef text_content_sequence::iterator text_content_iterator;
+  typedef text_content_sequence::const_iterator text_content_const_iterator;
+
+  static const std::size_t text_content_id = 2UL;
+
+  const text_content_sequence&
+  text_content () const;
+
+  text_content_sequence&
+  text_content ();
+
+  void
+  text_content (const text_content_sequence&);
+
+  // content_order
+  //
+  typedef xml_schema::content_order content_order_type;
+  typedef std::vector<content_order_type> content_order_sequence;
+  typedef content_order_sequence::iterator content_order_iterator;
+  typedef content_order_sequence::const_iterator content_order_const_iterator;
+
+  const content_order_sequence&
+  content_order () const;
+
+  content_order_sequence&
+  content_order ();
+
+  void
+  content_order (const content_order_sequence&);
+
+  ...
+};
+  
+ +

Given this interface we can iterate over both link elements + and text in content order. The following code fragment converts + our format to plain text with references.

+ +
+const text& t = ...
+
+for (text::content_order_const_iterator i (t.content_order ().begin ());
+     i != t.content_order ().end ();
+     ++i)
+{
+  switch (i->id)
+  {
+  case text::a_id:
+    {
+      const anchor& a (t.a ()[i->index]);
+      cerr << a << "[" << a.href () << "]";
+      break;
+    }
+  case text::text_content_id:
+    {
+      const xml_schema::string& s (t.text_content ()[i->index]);
+      cerr << s;
+      break;
+    }
+  default:
+    {
+      assert (false); // Unknown content id.
+    }
+  }
+}
+  
+

For the complete working code that shows the use of mixed content + in ordered types refer to the order/mixed example in + the examples/cxx/tree/ directory in the XSD + distribution.

@@ -5835,7 +6446,7 @@ XMLPlatformUtils::Terminate ();

Maintaining DOM association is normally useful when the application needs access to XML constructs that are not preserved in the - object model, for example, text in the mixed content model. + object model, for example, XML comments. Another useful aspect of DOM association is the ability of the application to navigate the document tree using the generic DOM interface (for example, with the help of an XPath processor) @@ -5845,7 +6456,7 @@ XMLPlatformUtils::Terminate (); be ignored during serialization. If you need to not only access but also modify some aspects of XML that are not preserved in the object model, then type customization with custom parsing - constructs and serialization operators should be used instead.

+ constructors and serialization operators should be used instead.

To request DOM association you will need to pass the xml_schema::flags::keep_dom flag to one of the -- cgit v1.1