diff options
Diffstat (limited to 'documentation/cxx/parser/guide/index.xhtml')
-rw-r--r-- | documentation/cxx/parser/guide/index.xhtml | 5236 |
1 files changed, 5236 insertions, 0 deletions
diff --git a/documentation/cxx/parser/guide/index.xhtml b/documentation/cxx/parser/guide/index.xhtml new file mode 100644 index 0000000..3f589a1 --- /dev/null +++ b/documentation/cxx/parser/guide/index.xhtml @@ -0,0 +1,5236 @@ +<?xml version="1.0" encoding="iso-8859-1"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> + +<head> + <title>Embedded C++/Parser Mapping Getting Started Guide</title> + + <meta name="copyright" content="© 2005-2009 Code Synthesis Tools CC"/> + <meta name="keywords" content="xsd,xml,schema,c++,mapping,data,binding,parser,validation,embedded,mobile"/> + <meta name="description" content="Embedded C++/Parser Mapping Getting Started Guide"/> + + <link rel="stylesheet" type="text/css" href="../../../default.css" /> + +<style type="text/css"> + pre { + padding : 0 0 0 0em; + margin : 0em 0em 0em 0; + + font-size : 102% + } + + body { + min-width: 48em; + } + + h1 { + font-weight: bold; + font-size: 200%; + line-height: 1.2em; + } + + h2 { + font-weight : bold; + font-size : 150%; + + padding-top : 0.8em; + } + + h3 { + font-size : 140%; + padding-top : 0.8em; + } + + /* Adjust indentation for three levels. */ + #container { + max-width: 48em; + } + + #content { + padding: 0 0.1em 0 4em; + /*background-color: red;*/ + } + + #content h1 { + margin-left: -2.06em; + } + + #content h2 { + margin-left: -1.33em; + } + + /* Title page */ + + #titlepage { + padding: 2em 0 1em 0; + border-bottom: 1px solid black; + } + + #titlepage .title { + font-weight: bold; + font-size: 200%; + text-align: center; + } + + #titlepage #first-title { + padding: 1em 0 0.4em 0; + } + + #titlepage #second-title { + padding: 0.4em 0 2em 0; + } + + /* Lists */ + ul.list li { + padding-top : 0.3em; + padding-bottom : 0.3em; + } + + ol.steps { + padding-left : 1.8em; + } + + ol.steps li { + padding-top : 0.3em; + padding-bottom : 0.3em; + } + + + div.img { + text-align: center; + padding: 2em 0 2em 0; + } + + /* */ + dl dt { + padding : 0.8em 0 0 0; + } + + /* TOC */ + table.toc { + border-style : none; + border-collapse : separate; + border-spacing : 0; + + margin : 0.2em 0 0.2em 0; + padding : 0 0 0 0; + } + + table.toc tr { + padding : 0 0 0 0; + margin : 0 0 0 0; + } + + table.toc * td, table.toc * th { + border-style : none; + margin : 0 0 0 0; + vertical-align : top; + } + + table.toc * th { + font-weight : normal; + padding : 0em 0.1em 0em 0; + text-align : left; + white-space : nowrap; + } + + table.toc * table.toc th { + padding-left : 1em; + } + + table.toc * td { + padding : 0em 0 0em 0.7em; + text-align : left; + } + + /* Built-in table */ + #builtin { + margin: 2em 0 2em 0; + + border-collapse : collapse; + border : 1px solid; + border-color : #000000; + + font-size : 11px; + line-height : 14px; + } + + #builtin th, #builtin td { + border: 1px solid; + padding : 0.9em 0.9em 0.7em 0.9em; + } + + #builtin th { + background : #cde8f6; + } + + #builtin td { + text-align: left; + } + + /* XML Schema features table. */ + #features { + margin: 2em 0 2em 0; + + border-collapse : collapse; + border : 1px solid; + border-color : #000000; + + font-size : 11px; + line-height : 14px; + } + + #features th, #features td { + border: 1px solid; + padding : 0.6em 0.6em 0.6em 0.6em; + } + + #features th { + background : #cde8f6; + } + + #features td { + text-align: left; + } +</style> + + +</head> + +<body> +<div id="container"> + <div id="content"> + + <div class="noprint"> + + <div id="titlepage"> + <div class="title" id="first-title">Embedded C++/Parser Mapping</div> + <div class="title" id="second-title">Getting Started Guide</div> + + <p>Copyright © 2005-2009 CODE SYNTHESIS TOOLS CC</p> + + <p>Permission is granted to copy, distribute and/or modify this + document under the terms of the + <a href="http://www.codesynthesis.com/licenses/fdl-1.2.txt">GNU Free + Documentation License, version 1.2</a>; with no Invariant Sections, + no Front-Cover Texts and no Back-Cover Texts. + </p> + + <p>This document is available in the following formats: + <a href="http://www.codesynthesis.com/projects/xsde/documentation/cxx/parser/guide/index.xhtml">XHTML</a>, + <a href="http://www.codesynthesis.com/projects/xsde/documentation/cxx/parser/guide/cxx-parser-e-guide.pdf">PDF</a>, and + <a href="http://www.codesynthesis.com/projects/xsde/documentation/cxx/parser/guide/cxx-parser-e-guide.ps">PostScript</a>.</p> + + </div> + + <h1>Table of Contents</h1> + + <table class="toc"> + <tr> + <th></th><td><a href="#0">Preface</a> + <table class="toc"> + <tr><th></th><td><a href="#0.1">About This Document</a></td></tr> + <tr><th></th><td><a href="#0.2">More Information</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>1</th><td><a href="#1">Introduction</a> + <table class="toc"> + <tr><th>1.1</th><td><a href="#1.1">Mapping Overview</a></td></tr> + <tr><th>1.2</th><td><a href="#1.2">Benefits</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>2</th><td><a href="#2">Hello World Example</a> + <table class="toc"> + <tr><th>2.1</th><td><a href="#2.1">Writing XML Document and Schema</a></td></tr> + <tr><th>2.2</th><td><a href="#2.2">Translating Schema to C++</a></td></tr> + <tr><th>2.3</th><td><a href="#2.3">Implementing Application Logic</a></td></tr> + <tr><th>2.4</th><td><a href="#2.4">Compiling and Running</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>3</th><td><a href="#3">Parser Skeletons</a> + <table class="toc"> + <tr><th>3.1</th><td><a href="#3.1">Implementing the Gender Parser</a></td></tr> + <tr><th>3.2</th><td><a href="#3.2">Implementing the Person Parser</a></td></tr> + <tr><th>3.3</th><td><a href="#3.3">Implementing the People Parser</a></td></tr> + <tr><th>3.4</th><td><a href="#3.4">Connecting the Parsers Together</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>4</th><td><a href="#4">Type Maps</a> + <table class="toc"> + <tr><th>4.1</th><td><a href="#4.1">Object Model</a></td></tr> + <tr><th>4.2</th><td><a href="#4.2">Type Map File Format</a></td></tr> + <tr><th>4.3</th><td><a href="#4.3">Parser Implementations</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>5</th><td><a href="#5">Mapping Configuration</a> + <table class="toc"> + <tr><th>5.1</th><td><a href="#5.1">Standard Template Library</a></td></tr> + <tr><th>5.2</th><td><a href="#5.2">Input/Output Stream Library</a></td></tr> + <tr><th>5.3</th><td><a href="#5.3">C++ Exceptions</a></td></tr> + <tr><th>5.4</th><td><a href="#5.4">XML Schema Validation</a></td></tr> + <tr><th>5.5</th><td><a href="#5.5">64-bit Integer Type</a></td></tr> + <tr><th>5.6</th><td><a href="#5.6">Parser Reuse</a></td></tr> + <tr><th>5.7</th><td><a href="#5.7">Support for Polymorphism</a></td></tr> + <tr><th>5.8</th><td><a href="#5.8">A Minimal Example</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>6</th><td><a href="#6">Built-In XML Schema Type Parsers</a> + <table class="toc"> + <tr><th>6.1</th><td><a href="#6.1"><code>QName</code> Parser</a></td></tr> + <tr><th>6.2</th><td><a href="#6.2"><code>NMTOKENS</code> and <code>IDREFS</code> Parsers</a></td></tr> + <tr><th>6.3</th><td><a href="#6.3"><code>base64Binary</code> and <code>hexBinary</code> Parsers</a></td></tr> + <tr><th>6.4</th><td><a href="#6.4">Time Zone Representation</a></td></tr> + <tr><th>6.5</th><td><a href="#6.5"><code>date</code> Parser</a></td></tr> + <tr><th>6.6</th><td><a href="#6.6"><code>dateTime</code> Parser</a></td></tr> + <tr><th>6.7</th><td><a href="#6.7"><code>duration</code> Parser</a></td></tr> + <tr><th>6.8</th><td><a href="#6.8"><code>gDay</code> Parser</a></td></tr> + <tr><th>6.9</th><td><a href="#6.9"><code>gMonth</code> Parser</a></td></tr> + <tr><th>6.10</th><td><a href="#6.10"><code>gMonthDay</code> Parser</a></td></tr> + <tr><th>6.11</th><td><a href="#6.11"><code>gYear</code> Parser</a></td></tr> + <tr><th>6.12</th><td><a href="#6.12"><code>gYearMonth</code> Parser</a></td></tr> + <tr><th>6.13</th><td><a href="#6.13"><code>time</code> Parser</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th>7</th><td><a href="#7">Document Parser and Error Handling</a> + <table class="toc"> + <tr><th>7.1</th><td><a href="#7.1">Document Parser</a></td></tr> + <tr><th>7.2</th><td><a href="#7.2">Exceptions</a></td></tr> + <tr><th>7.3</th><td><a href="#7.3">Error Codes</a></td></tr> + <tr><th>7.4</th><td><a href="#7.4">Reusing Parsers after an Error</a></td></tr> + </table> + </td> + </tr> + + <tr> + <th></th><td><a href="#A">Appendix A — Supported XML Schema Constructs</a></td> + </tr> + + </table> + </div> + + <h1><a name="0">Preface</a></h1> + + <h2><a name="0.1">About This Document</a></h2> + + <p>The goal of this document is to provide you with an + understanding of the C++/Parser programming model and allow you + to efficiently evaluate XSD/e against your project's technical + requirements. As such, this document is intended for embedded + C++ developers and software architects who are looking for an + embedded XML processing solution. Prior experience with XML + and C++ is required to understand this document. Basic + understanding of XML Schema is advantageous but not expected + or required. + </p> + + + <h2><a name="0.2">More Information</a></h2> + + <p>Beyond this guide, you may also find the following sources of + information useful:</p> + + <ul class="list"> + <li><a href="http://www.codesynthesis.com/projects/xsde/documentation/xsde.xhtml">XSD/e + Compiler Command Line Manual</a></li> + + <li>The <code>INSTALL</code> file in the XSD/e distribution provides + build instructions for various platforms.</li> + + <li>The <code>examples/cxx/parser/</code> directory in the XSD/e + distribution contains a collection of examples and a README + file with an overview of each example.</li> + + <li>The <a href="http://www.codesynthesis.com/mailman/listinfo/xsde-users">xsde-users</a> + mailing list is the place to ask technical questions about XSD/e and the + Embedded C++/Parser mapping. Furthermore, the + <a href="http://www.codesynthesis.com/pipermail/xsde-users/">archives</a> + may already have answers to some of your questions.</li> + + </ul> + + <!-- Introduction --> + + <h1><a name="1">1 Introduction</a></h1> + + <p>Welcome to CodeSynthesis XSD/e and the Embedded C++/Parser mapping. + XSD/e is a validating XML parser/serializer generator for mobile and + embedded systems. Embedded C++/Parser is a W3C XML Schema to C++ + mapping that represents an XML vocabulary as a set of parser + skeletons which you can implement to perform XML processing as + required by your application logic. + </p> + + <h2><a name="1.1">1.1 Mapping Overview</a></h2> + + <p>The Embedded C++/Parser mapping provides event-driven, stream-oriented + XML parsing, XML Schema validation, and C++ data binding. It was + specifically designed and optimized for mobile and embedded + systems where hardware constraints require high efficiency and + economical use of resources. As a result, the generated + parsers are 2-10 times faster than general-purpose validating + XML parsers while at the same time maintaining extremely low static + and dynamic memory footprints. For example, a validating parser + executable can be as small as 120KB in size. The size can be + further reduced by disabling support for XML Schema validation. + </p> + + <p>The generated code and the runtime library are also highly-portable + and, in their minimal configuration, can be used without STL, RTTI, + iostream, C++ exceptions, and C++ templates.</p> + + <p>To speed up application development, the C++/Parser mapping + can be instructed to generate sample parser implementations + and a test driver which can then be filled with the application + logic code. The mapping also provides a wide range of + mechanisms for controlling and customizing the generated code.</p> + + <p>The next chapter shows how to create a simple application + that uses the Embedded C++/Parser mapping to parse, validate, + and extract data from a simple XML instance document. The + following chapters describe the Embedded C++/Parser mapping + in more detail.</p> + + <h2><a name="1.2">1.2 Benefits</a></h2> + + <p>Traditional XML access APIs such as Document Object Model (DOM) + or Simple API for XML (SAX) as well as general-purpose XML Schema + validators have a number of drawbacks that make them less suitable + for creating mobile and embedded XML processing applications. These + drawbacks include: + </p> + + <ul class="list"> + <li>Text-based representation results in inefficient use of + resources.</li> + + <li>Extra validation code that is not used by the application.</li> + + <li>Generic representation of XML in terms of elements, attributes, + and text forces an application developer to write a substantial + amount of bridging code that identifies and transforms pieces + of information encoded in XML to a representation more suitable + for consumption by the application logic.</li> + + <li>String-based flow control defers error detection to runtime. + It also reduces code readability and maintainability.</li> + + <li>Lack of type safety because all information is represented + as text.</li> + + <li>Resulting applications are hard to debug, change, and + maintain.</li> + </ul> + + <p>In contrast, statically-typed, vocabulary-specific parser + skeletons produced by the Embedded C++/Parser mapping use + native data representations (for example, integers are passed as + integers, not as text) and include validation code only for + XML Schema constructs that are used in the application. This + results in efficient use of resources and compact object code.</p> + + <p>Furthermore, the parser skeletons allow you to operate in your + domain terms instead of the generic elements, attributes, and + text. Static typing helps catch errors at compile-time rather + than at run-time. Automatic code generation frees you for more + interesting tasks (such as doing something useful with the + information stored in the XML documents) and minimizes the + effort needed to adapt your applications to changes in the + document structure. To summarize, the C++/Parser mapping has + the following key advantages over generic XML access APIs:</p> + + <ul class="list"> + <li><b>Ease of use.</b> The generated code hides all the complexity + associated with recreating the document structure, maintaining the + dispatch state, and converting the data from the text representation + to data types suitable for manipulation by the application logic. + Parser skeletons also provide a convenient mechanism for building + custom in-memory representations.</li> + + <li><b>Natural representation.</b> The generated parser skeletons + implement parser callbacks as virtual functions with names + corresponding to elements and attributes in XML. As a result, + you process the XML data using your domain vocabulary instead + of generic elements, attributes, and text. + </li> + + <li><b>Concise code.</b> With a separate parser skeleton for each + XML Schema type, the application implementation is + simpler and thus easier to read and understand.</li> + + <li><b>Safety.</b> The XML data is delivered to parser callbacks as + statically typed objects. The parser callbacks themselves are virtual + functions. This helps catch programming errors at compile-time + rather than at runtime.</li> + + <li><b>Maintainability.</b> Automatic code generation minimizes the + effort needed to adapt the application to changes in the + document structure. With static typing, the C++ compiler + can pin-point the places in the application code that need to be + changed.</li> + + <li><b>Efficiency.</b> The generated parser skeletons use native + data representations and combine data extraction, validation, + and even dispatching in a single step. This makes them much + more efficient than traditional architectures with separate + stages for validation and data extraction/dispatch.</li> + </ul> + + + <!-- Hello World Example --> + + + <h1><a name="2">2 Hello World Example</a></h1> + + <p>In this chapter we will examine how to parse a very simple XML + document using the XSD/e-generated C++/Parser skeletons. + + All the code presented in this chapter is based on the <code>hello</code> + example which can be found in the <code>examples/cxx/parser/</code> + directory of the XSD/e distribution.</p> + + <h2><a name="2.1">2.1 Writing XML Document and Schema</a></h2> + + <p>First, we need to get an idea about the structure + of the XML documents we are going to process. Our + <code>hello.xml</code>, for example, could look like this:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<hello> + + <greeting>Hello</greeting> + + <name>sun</name> + <name>earth</name> + <name>world</name> + +</hello> + </pre> + + <p>Then we can write a description of the above XML in the + XML Schema language and save it into <code>hello.xsd</code>:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + + <xs:complexType name="hello"> + <xs:sequence> + <xs:element name="greeting" type="xs:string"/> + <xs:element name="name" type="xs:string" maxOccurs="unbounded"/> + </xs:sequence> + </xs:complexType> + + <xs:element name="hello" type="hello"/> + +</xs:schema> + </pre> + + <p>Even if you are not familiar with the XML Schema language, it + should be easy to connect declarations in <code>hello.xsd</code> + to elements in <code>hello.xml</code>. The <code>hello</code> type + is defined as a sequence of the nested <code>greeting</code> and + <code>name</code> elements. Note that the term sequence in XML + Schema means that elements should appear in a particular order + as opposed to appearing multiple times. The <code>name</code> + element has its <code>maxOccurs</code> property set to + <code>unbounde</code> which means it can appear multiple times + in an XML document. Finally, the globally-defined <code>hello</code> + element prescribes the root element for our vocabulary. For an + easily-approachable introduction to XML Schema refer to + <a href="http://www.w3.org/TR/xmlschema-0/">XML Schema Part 0: + Primer</a>.</p> + + <p>The above schema is a specification of our vocabulary; it tells + everybody what valid XML instances of our vocabulary should look + like. The next step is to compile this schema to generate C++ parser + skeletons.</p> + + <h2><a name="2.2">2.2 Translating Schema to C++</a></h2> + + <p>Now we are ready to translate our <code>hello.xsd</code> to C++ parser + skeletons. To do this we invoke the XSD/e compiler from a terminal + (UNIX) or a command prompt (Windows): + </p> + + <pre class="terminal"> +$ xsde cxx-parser hello.xsd + </pre> + + <p>The XSD/e compiler produces two C++ files: <code>hello-pskel.hxx</code> + and <code>hello-pskel.cxx</code>. The following code fragment is taken + from <code>hello-pskel.hxx</code>; it should give you an idea about what + gets generated: + </p> + + <pre class="c++"> +class hello_pskel +{ +public: + // Parser callbacks. Override them in your implementation. + // + virtual void + pre (); + + virtual void + greeting (const std::string&); + + virtual void + name (const std::string&); + + virtual void + post_hello (); + + // Parser construction API. + // + void + greeting_parser (xml_schema::string_pskel&); + + void + name_parser (xml_schema::string_pskel&); + + void + parsers (xml_schema::string_pskel& /* greeting */, + xml_schema::string_pskel& /* name */); + +private: + ... +}; + </pre> + + <p>The first four member functions shown above are called parser + callbacks. You would normally override them in your implementation + of the parser to do something useful. Let's go through all of + them one by one.</p> + + <p>The <code>pre()</code> function is an initialization callback. It is + called when a new element of type <code>hello</code> is about + to be parsed. You would normally use this function to allocate a new + instance of the resulting type or clear accumulators that are used + to gather information during parsing. The default implementation + of this function does nothing.</p> + + <p>The <code>post_hello()</code> function is a finalization callback. Its + name is constructed by adding the parser skeleton name to the + <code>post_</code> prefix. The finalization callback is called when + parsing of the element is complete and the result, if any, should + be returned. Note that in our case the return type of + <code>post_hello()</code> is <code>void</code> which means there + is nothing to return. More on parser return types later. + </p> + + <p>You may be wondering why the finalization callback is called + <code>post_hello()</code> instead of <code>post()</code> just + like <code>pre()</code>. The reason for this is that + finalization callbacks can have different return types and + result in function signature clashes across inheritance + hierarchies. To prevent this, the signatures of finalization + callbacks are made unique by adding the type name to their names.</p> + + <p>The <code>greeting()</code> and <code>name()</code> functions are + called when the <code>greeting</code> and <code>name</code> elements + have been parsed, respectively. Their arguments are of type + <code>std::string</code> and contain the data extracted from XML.</p> + + <p>The last three functions are for connecting parsers to each other. + For example, there is a predefined parser for built-in XML Schema type + <code>string</code> in the XSD/e runtime. We will be using + it to parse the contents of <code>greeting</code> and + <code>name</code> elements, as shown in the next section.</p> + + <h2><a name="2.3">2.3 Implementing Application Logic</a></h2> + + <p>At this point we have all the parts we need to do something useful + with the information stored in XML documents. The first step is + to implement the parser: + </p> + + <pre class="c++"> +#include <iostream> +#include "hello-pskel.hxx" + +class hello_pimpl: public hello_pskel +{ +public: + virtual void + greeting (const std::string& g) + { + greeting_ = g; + } + + virtual void + name (const std::string& n) + { + std::cout << greeting_ << ", " << n << "!" << std::endl; + } + +private: + std::string greeting_; +}; + </pre> + + <p>We left both <code>pre()</code> and <code>post_hello()</code> with the + default implementations; we don't have anything to initialize or + return. The rest is pretty straightforward: we store the greeting + in a member variable and later, when parsing names, use it to + say hello.</p> + + <p>An observant reader my ask what happens if the <code>name</code> + element comes before <code>greeting</code>? Don't we need to + make sure <code>greeting_</code> was initialized and report + an error otherwise? The answer is no, we don't have to do + any of this. The <code>hello_pskel</code> parser skeleton + performs validation of XML according to the schema from which + it was generated. As a result, it will check the order + of the <code>greeting</code> and <code>name</code> elements + and report an error if it is violated.</p> + + <p>Now it is time to put this parser implementation to work:</p> + + <pre class="c++"> +using namespace std; + +int +main (int argc, char* argv[]) +{ + try + { + // Construct the parser. + // + xml_schema::string_pimpl string_p; + hello_pimpl hello_p; + + hello_p.greeting_parser (string_p); + hello_p.name_parser (string_p); + + // Parse the XML instance. + // + xml_schema::document_pimpl doc_p (hello_p, "hello"); + + hello_p.pre (); + doc_p.parse (argv[1]); + hello_p.post_hello (); + } + catch (const xml_schema::parser_exception& e) + { + cerr << argv[1] << ":" << e.line () << ":" << e.column () + << ": " << e.text () << endl; + return 1; + } +} + </pre> + + <p>The first part of this code snippet instantiates individual parsers + and assembles them into a complete vocabulary parser. + <code>xml_schema::string_pimpl</code> is an implementation of a parser + for built-in XML Schema type <code>string</code>. It is provided by + the XSD/e runtime along with parsers for other built-in types (for + more information on the built-in parsers see <a href="#6">Chapter 6, + "Built-In XML Schema Type Parsers"</a>). We use <code>string_pimpl</code> + to parse the <code>greeting</code> and <code>name</code> elements as + indicated by the calls to <code>greeting_parser()</code> and + <code>name_parser()</code>. + </p> + + <p>Then we instantiate a document parser (<code>doc_p</code>). The + first argument to its constructor is the parser for + the root element (<code>hello_p</code> in our case). The + second argument is the root element name. + </p> + + <p>The final piece is the calls to <code>pre()</code>, <code>parse()</code>, + and <code>post_hello()</code>. The call to <code>parse()</code> + perform the actual XML parsing while the calls to <code>pre()</code> and + <code>post_hello()</code> make sure that the parser for the root + element can perform proper initialization and cleanup.</p> + + <p>While our parser implementation and test driver are pretty small and + easy to write by hand, for bigger XML vocabularies it can be a + substantial effort. To help with this task XSD/e can automatically + generate sample parser implementations and a test driver from your + schemas. You can request the generation of a sample implementation with + empty function bodies by specifying the <code>--generate-noop-impl</code> + option. Or you can generate a sample implementation that prints the + data store in XML by using the <code>--generate-print-impl</code> + option. To request the generation of a test driver you can use the + <code>--generate-test-driver</code> option. For more information + on these options refer to the + <a href="http://www.codesynthesis.com/projects/xsde/documentation/xsde.xhtml">XSD/e + Compiler Command Line Manual</a>. The <code>'generated'</code> example + in the XSD/e distribution shows the sample implementation generation + feature in action.</p> + + + <h2><a name="2.4">2.4 Compiling and Running</a></h2> + + <p>After saving all the parts from the previous section in + <code>driver.cxx</code>, we are ready to compile our first + application and run it on the test XML document. On UNIX + this can be done with the following commands: + </p> + + <pre class="terminal"> +$ c++ -I.../libxsde -c driver.cxx hello-pskel.cxx +$ c++ -o driver driver.o hello-pskel.o .../libxsde/xsde/libxsde.a +$ ./driver hello.xml +Hello, sun! +Hello, moon! +Hello, world! + </pre> + + <p>Here <code>.../libxsde</code> represents the path to the + <code>libxsde</code> directory in the XSD/e distribution. + We can also test the error handling. To test XML well-formedness + checking, we can try to parse <code>hello-pskel.hxx</code>:</p> + + <pre class="terminal"> +$ ./driver hello-pskel.hxx +hello-pskel.hxx:1:0: not well-formed (invalid token) + </pre> + + <p>We can also try to parse a valid XML but not from our + vocabulary, for example <code>hello.xsd</code>:</p> + + <pre class="terminal"> +$ ./driver hello.xsd +hello.xsd:2:57: unexpected element encountered + </pre> + + + <!-- Chapater 3 --> + + + <h1><a name="3">3 Parser Skeletons</a></h1> + + <p>As we have seen in the previous chapter, the XSD/e compiler generates + a parser skeleton class for each type defined in XML Schema. In + this chapter we will take a closer look at different functions + that comprise a parser skeleton as well as the way to connect + our implementations of these parser skeletons to create a complete + parser.</p> + + <p>In this and subsequent chapters we will use the following schema + that describes a collection of person records. We save it in + <code>people.xsd</code>:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + + <xs:simpleType name="gender"> + <xs:restriction base="xs:string"> + <xs:enumeration value="male"/> + <xs:enumeration value="female"/> + </xs:restriction> + </xs:simpleType> + + <xs:complexType name="person"> + <xs:sequence> + <xs:element name="first-name" type="xs:string"/> + <xs:element name="last-name" type="xs:string"/> + <xs:element name="gender" type="gender"/> + <xs:element name="age" type="xs:short"/> + </xs:sequence> + </xs:complexType> + + <xs:complexType name="people"> + <xs:sequence> + <xs:element name="person" type="person" maxOccurs="unbounded"/> + </xs:sequence> + </xs:complexType> + + <xs:element name="people" type="people"/> + +</xs:schema> + </pre> + + <p>A sample XML instance to go along with this schema is saved + in <code>people.xml</code>:</p> + + <pre class="xml"> +<?xml version="1.0"?> +<people> + <person> + <first-name>John</first-name> + <last-name>Doe</last-name> + <gender>male</gender> + <age>32</age> + </person> + <person> + <first-name>Jane</first-name> + <last-name>Doe</last-name> + <gender>female</gender> + <age>28</age> + </person> +</people> + </pre> + + <p>Compiling <code>people.xsd</code> with the XSD/e compiler results + in three parser skeletons being generated: <code>gender_pskel</code>, + <code>person_pskel</code>, and <code>people_pskel</code>. We are going + to examine and implement each of them in the subsequent sections.</p> + + <h2><a name="3.1">3.1 Implementing the Gender Parser</a></h2> + + <p>The generated <code>gender_pskel</code> parser skeleton looks like + this:</p> + + <pre class="c++"> +class gender_pskel: public xml_schema::string_pskel +{ +public: + gender_pskel (xml_schema::string_pskel* base_impl); + + // Parser callbacks. Override them in your implementation. + // + virtual void + pre (); + + virtual void + post_gender (); +}; + </pre> + + <p>Notice that <code>gender_pskel</code> inherits from + <code>xml_schema::string_pskel</code> which is a parser skeleton + for built-in XML Schema type <code>string</code> and is + predefined in the XSD/e runtime library. This is an example + of the general rule that parser skeletons follow: if a type + in XML Schema inherits from another then there will be an + equivalent inheritance between the corresponding parser + skeleton classes. The <code>gender_pskel</code> class also + declares a constructor which expects a pointer to the base + parser skeleton. We will discuss the purpose of this + constructor shortly.</p> + + <p>The <code>pre()</code> and <code>post_gender()</code> callbacks + should look familiar from the previous chapter. Let's now + implement the parser. Our implementation will simply print + the gender to <code>cout</code>:</p> + + + <pre class="c++"> +class gender_pimpl: public gender_pskel +{ +public: + gender_pimpl () + : gender_pskel (&base_impl_) + { + } + + virtual void + post_gender () + { + std::string s = post_string (); + cout << "gender: " << s << endl; + } + +private: + xml_schema::string_pimpl base_impl_; +}; + </pre> + + <p>While the code is quite short, there is a lot going on. First, + notice that we define a member variable <code>base_impl_</code> + of type <code>xml_schema::string_pimpl</code> and then pass + it to the <code>gender_pskel</code>'s constructor. We have + encountered <code>xml_schema::string_pimpl</code> already; it is an + implementation of the <code>xml_schema::string_pskel</code> parser + skeleton for built-in XML Schema type <code>string</code>. By + passing <code>base_impl_</code> to the <code>gender_pskel</code>'s + constructor we provide an implementation for the part of the + parser skeleton that is inherited from <code>string_pskel</code>.</p> + + <p>This is another common theme in the C++/Parser programming model: + reusing implementations of the base parsers in the derived ones. + In our case, <code>string_pimpl</code> will do all the dirty work + of extracting the data and we can just get it at the end with the + call to <code>post_string()</code>. For more information on parser + implementation reuse refer to <a href="#5.6">Section 5.6, + "Parser Reuse"</a>.</p> + + <p>In case you are curious, here are the definitions for + <code>xml_schema::string_pskel</code> and + <code>xml_schema::string_pimpl</code>:</p> + + <pre class="c++"> +namespace xml_schema +{ + class string_pskel: public parser_simple_content + { + public: + virtual std::string + post_string () = 0; + }; + + class string_pimpl: public string_pskel + { + public: + virtual void + _pre (); + + virtual void + _characters (const xml_schema::ro_string&); + + virtual std::string + post_string (); + + protected: + std::string str_; + }; +} + </pre> + + <p>There are three new pieces in this code that we haven't seen yet. + Those are the <code>parser_simple_content</code> class and + the <code>_pre()</code> and <code>_characters()</code> functions. + The <code>parser_simple_content</code> class is defined in the XSD/e + runtime and is a base class for all parser skeletons that conform + to the simple content model in XML Schema. Types with the + simple content model cannot have nested elements—only text + and attributes. There is also the <code>parser_complex_content</code> + class which corresponds to the complex content mode (types with + nested elements, for example, <code>person</code> from + <code>people.xsd</code>).</p> + + <p>The <code>_pre()</code> function is a parser callback. Remember we + talked about the <code>pre()</code> and <code>post_*()</code> callbacks + in the previous chapter? There are actually two more callbacks + with similar roles: <code>_pre()</code> and <code>_post ()</code>. + As a result, each parser skeleton has four special callbacks:</p> + + <pre class="c++"> + virtual void + pre (); + + virtual void + _pre (); + + virtual void + _post (); + + virtual void + post_name (); + </pre> + + <p><code>pre()</code> and <code>_pre()</code> are initialization + callbacks. They get called in that order before a new instance of the type + is about to be parsed. The difference between <code>pre()</code> and + <code>_pre()</code> is conventional: <code>pre()</code> can + be completely overridden by a derived parser. The derived + parser can also override <code>_pre()</code> but has to always call + the original version. This allows you to partition initialization + into customizable and required parts.</p> + + <p>Similarly, <code>_post()</code> and <code>post_name()</code> are + finalization callbacks with exactly the same semantics: + <code>post_name()</code> can be completely overridden by the derived + parser while the original <code>_post()</code> should always be called. + </p> + + <p>The final bit we need to discuss in this section is the + <code>_characters()</code> function. As you might have guessed, it + is also a callback. A low-level one that delivers raw character content + for the type being parsed. You will seldom need to use this callback + directly. Using implementations for the built-in parsers provided by + the XSD/e runtime is usually a simpler and more convenient + alternative.</p> + + <p>At this point you might be wondering why some <code>post_*()</code> + callbacks, for example <code>post_string()</code>, return some data + while others, for example <code>post_gender()</code>, have + <code>void</code> as a return type. This is a valid concern + and it will be addressed in the next chapter.</p> + + <h2><a name="3.2">3.2 Implementing the Person Parser</a></h2> + + <p>The generated <code>person_pskel</code> parser skeleton looks like + this:</p> + + <pre class="c++"> +class person_pskel: public xml_schema::parser_complex_content +{ +public: + // Parser callbacks. Override them in your implementation. + // + virtual void + pre (); + + virtual void + first_name (const std::string&); + + virtual void + last_name (const std::string&); + + virtual void + gender (); + + virtual void + age (short); + + virtual void + post_person (); + + // Parser construction API. + // + void + first_name_parser (xml_schema::string_pskel&); + + void + last_name_parser (xml_schema::string_pskel&); + + void + gender_parser (gender_pskel&); + + void + age_parser (xml_schema::short_pskel&); + + void + parsers (xml_schema::string_pskel& /* first-name */, + xml_schema::string_pskel& /* last-name */, + gender_pskel& /* gender */, + xml_schema::short_pskel& /* age */); +}; + </pre> + + + <p>As you can see, we have a parser callback for each of the nested + elements found in the <code>person</code> XML Schema type. + The implementation of this parser is straightforward:</p> + + <pre class="c++"> +class person_pimpl: public person_pskel +{ +public: + virtual void + first_name (const std::string& n) + { + cout << "first: " << f << endl; + } + + virtual void + last_name (const std::string& l) + { + cout << "last: " << l << endl; + } + + virtual void + age (short a) + { + cout << "age: " << a << endl; + } +}; + </pre> + + <p>Notice that we didn't override the <code>gender()</code> callback + because all the printing is done by <code>gender_pimpl</code>.</p> + + <h2><a name="3.3">3.3 Implementing the People Parser</a></h2> + + <p>The generated <code>people_pskel</code> parser skeleton looks like + this:</p> + + <pre class="c++"> +class people_pskel: public xml_schema::parser_complex_content +{ +public: + // Parser callbacks. Override them in your implementation. + // + virtual void + pre (); + + virtual void + person (); + + virtual void + post_people (); + + // Parser construction API. + // + void + person_parser (person_pskel&); + + void + parsers (person_pskel& /* person */); +}; + </pre> + + <p>The <code>person()</code> callback will be called after parsing each + <code>person</code> element. While <code>person_pimpl</code> does + all the printing, one useful thing we can do in this callback is to + print an extra newline after each person record so that our + output is more readable:</p> + + <pre class="c++"> +class people_pimpl: public people_pskel +{ +public: + virtual void + person () + { + cout << endl; + } +}; + </pre> + + <p>Now it is time to put everything together.</p> + + + <h2><a name="3.4">3.4 Connecting the Parsers Together</a></h2> + + <p>At this point we have all the individual parsers implemented + and can proceed to assemble them into a complete parser + for our XML vocabulary. The first step is to instantiate + all the individual parsers that we will need:</p> + + <pre class="c++"> +xml_schema::short_pimpl short_p; +xml_schema::string_pimpl string_p; + +gender_pimpl gender_p; +person_pimpl person_p; +people_pimpl people_p; + </pre> + + <p>Notice that our schema uses two built-in XML Schema types: + <code>string</code> for the <code>first-name</code> and + <code>last-name</code> elements as well as <code>short</code> + for <code>age</code>. We will use predefined parsers that + come with the XSD/e runtime to handle these types. The next + step is to connect all the individual parsers. We do this + with the help of functions defined in the parser + skeletons and marked with the "Parser Construction API" + comment. One way to do it is to connect each individual + parser by calling the <code>*_parser()</code> functions:</p> + + <pre class="c++"> +person_p.first_name_parser (string_p); +person_p.last_name_parser (string_p); +person_p.gender_parser (gender_p); +person_p.age_parser (short_p); + +people_p.person_parser (person_p); + </pre> + + <p>You might be wondering what happens if you do not provide + a parser by not calling one of the <code>*_parser()</code> functions. + In that case the corresponding XML content will be skipped, + including validation. This is an efficient way to ignore parts + of the document that you are not interested in.</p> + + + <p>An alternative, shorter, way to connect the parsers is by using + the <code>parsers()</code> functions which connects all the parsers + for a given type at once:</p> + + <pre class="c++"> +person_p.parsers (string_p, string_p, gender_p, short_p); +people_p.parsers (person_p); + </pre> + + <p>The following figure illustrates the resulting connections. Notice + the correspondence between return types of the <code>post_*()</code> + functions and argument types of element callbacks that are connected + by the arrows.</p> + + <!-- align=center is needed for html2ps --> + <div class="img" align="center"><img src="figure-1.png"/></div> + + <p>The last step is the construction of the document parser and + invocation of the complete parser on our sample XML instance:</p> + + <pre class="c++"> +xml_schema::document_pimpl doc_p (people_p, "people"); + +people_p.pre (); +doc_p.parse ("people.xml"); +people_p.post_people (); + </pre> + + <p>Let's consider <code>xml_schema::document_pimpl</code> in + more detail. While the exact definition of this class + varies depending on the mapping configuration, here is + the part relevant to our example:</p> + + <pre class="c++"> +namespace xml_schema +{ + class document_pimpl + { + public: + document_pimpl (xml_schema::parser_base&, + const std::string& root_element_name); + + document_pimpl (xml_schema::parser_base&, + const std::string& root_element_namespace, + const std::string& root_element_name); + + void + parse (const std::string& file); + + void + parse (std::istream&); + + void + parse (const void* data, size_t size, bool last); + }; +} + </pre> + + <p><code>xml_schema::document_pimpl</code> is a root parser for + the vocabulary. The first argument to its constructors is the + parser for the type of the root element (<code>people_pimpl</code> + in our case). Because a type parser is only concerned with + the element's content and not with the element's name, we need + to specify the root element name somewhere. That's + what is passed as the second and third arguments to the + <code>document_pimpl</code>'s constructors.</p> + + <p>There are also three overloaded <code>parse()</code> function + defined in the <code>document_pimpl</code> class. The first version + parses a local file identified by a name. The second version + reads the data from an input stream. The last version allows + you to parse the data directly from a buffer, one chunk at a + time. You can call this function multiple times with the final + call having the <code>last</code> argument set to true. For more + information on the <code>xml_schema::document_pimpl</code> class + refer to <a href="#7">Chapter 7, "Document Parser and Error + Handling"</a>.</p> + + <p>Let's now consider a step-by-step list of actions that happen + as we parse through <code>people.xml</code>. The content of + <code>people.xml</code> is repeated below for convenience.</p> + + <pre class="xml"> +<?xml version="1.0"?> +<people> + <person> + <first-name>John</first-name> + <last-name>Doe</last-name> + <gender>male</gender> + <age>32</age> + </person> + <person> + <first-name>Jane</first-name> + <last-name>Doe</last-name> + <gender>female</gender> + <age>28</age> + </person> +</people> + </pre> + + + <ol class="steps"> + <li><code>people_p.pre()</code> is called from + <code>main()</code>. We did not provide any implementation + for this callback so this call is a no-op.</li> + + <li><code>doc_p.parse("people.xml")</code> is called from + <code>main()</code>. The parser opens the file and starts + parsing its content.</li> + + <li>The parser encounters the root element. <code>doc_p</code> + verifies that the root element is correct and calls + <code>_pre()</code> on <code>people_p</code> which is also + a no-op. Parsing is now delegated to <code>people_p</code>.</li> + + <li>The parser encounters the <code>person</code> element. + <code>people_p</code> determines that <code>person_p</code> + is responsible for parsing this element. <code>pre()</code> + and <code>_pre()</code> callbacks are called on <code>person_p</code>. + Parsing is now delegated to <code>person_p</code>.</li> + + <li>The parser encounters the <code>first-name</code> element. + <code>person_p</code> determines that <code>string_p</code> + is responsible for parsing this element. <code>pre()</code> + and <code>_pre()</code> callbacks are called on <code>string_p</code>. + Parsing is now delegated to <code>string_p</code>.</li> + + <li>The parser encounters character content consisting of + <code>"John"</code>. The <code>_characters()</code> callback is + called on <code>string_p</code>.</li> + + <li>The parser encounters the end of <code>first-name</code> + element. The <code>_post()</code> and <code>post_string()</code> + callbacks are called on <code>string_p</code>. The + <code>first_name()</code> callback is called on <code>person_p</code> + with the return value of <code>post_string()</code>. The + <code>first_name()</code> implementation prints + <code>"first: John"</code> to <code>cout</code>. + Parsing is now returned to <code>person_p</code>.</li> + + <li>Steps analogous to 5-7 are performed for the <code>last-name</code>, + <code>gender</code>, and <code>age</code> elements.</li> + + <li>The parser encounters the end of <code>person</code> + element. The <code>_post()</code> and <code>post_person()</code> + callbacks are called on <code>person_p</code>. The + <code>person()</code> callback is called on <code>people_p</code>. + The <code>person()</code> implementation prints a new line + to <code>cout</code>. Parsing is now returned to + <code>people_p</code>.</li> + + <li>Steps 4-9 are performed for the second <code>person</code> + element.</li> + + <li>The parser encounters the end of <code>people</code> + element. The <code>_post()</code> callback is called on + <code>people_p</code>. The <code>doc_p.parse("people.xml")</code> + call returns to <code>main()</code>.</li> + + <li><code>people_p.post_people()</code> is called from + <code>main()</code> which is a no-op.</li> + + </ol> + + + <!-- Chpater 4 --> + + + <h1><a name="4">4 Type Maps</a></h1> + + <p>There are many useful things you can do inside parser callbacks as they + are right now. There are, however, times when you want to propagate + some information from one parser to another or to the caller of the + parser. One common task that would greatly benefit from such a + possibility is building a tree-like in-memory object model of the + data stored in XML. During execution, each individual sub-parser + would create a sub-tree and return it to its <em>parent</em> parser + which can then incorporate this sub-tree into the whole tree.</p> + + <p>In this chapter we will discuss the mechanisms offered by the + C++/Parser mapping for returning information from individual + parsers and see how to use them to build an object model + of our people vocabulary.</p> + + <h2><a name="4.1">4.1 Object Model</a></h2> + + <p>An object model for our person record example could + look like this (saved in the <code>people.hxx</code> file):</p> + + <pre class="c++"> +#include <string> +#include <vector> + +enum gender +{ + male, + female +}; + +class person +{ +public: + person (const std::string& first, + const std::string& last, + ::gender gender, + short age) + : first_ (first), last_ (last), + gender_ (gender), age_ (age) + { + } + + const std::string& + first () const + { + return first_; + } + + const std::string& + last () const + { + return last_; + } + + ::gender + gender () const + { + return gender_; + } + + short + age () const + { + return age_; + } + +private: + std::string first_; + std::string last_; + ::gender gender_; + short age_; +}; + +typedef std::vector<person> people; + </pre> + + <p>While it is clear which parser is responsible for which part of + the object model, it is not exactly clear how, for + example, <code>gender_pimpl</code> will deliver <code>gender</code> + to <code>person_pimpl</code>. You might have noticed that + <code>string_pimpl</code> manages to deliver its value to the + <code>first_name()</code> callback of <code>person_pimpl</code>. Let's + see how we can utilize the same mechanism to propagate our + own data.</p> + + <p>There is a way to tell the XSD/e compiler that you want to + exchange data between parsers. More precisely, for each + type defined in XML Schema, you can tell the compiler two things. + First, the return type of the <code>post_*()</code> callback + in the parser skeleton generated for this type. And, second, + the argument type for callbacks corresponding to elements and + attributes of this type. For example, for XML Schema type + <code>gender</code> we can specify the return type for + <code>post_gender()</code> in the <code>gender_pskel</code> + skeleton and the argument type for the <code>gender()</code> callback + in the <code>person_pskel</code> skeleton. As you might have guessed, + the generated code will then pass the return value from the + <code>post_*()</code> callback as an argument to the element or + attribute callback.</p> + + <p>The way to tell the XSD/e compiler about these XML Schema to + C++ mappings is with type map files. Here is a simple type + map for the <code>gender</code> type from the previous paragraph.</p> + + <pre class="type-map"> +include "people.hxx"; +gender ::gender ::gender; + </pre> + + <p>The first line indicates that the generated code must include + <code>people.hxx</code> in order to get the definition for the + <code>gender</code> type. The second line specifies that both + argument and return types for the <code>gender</code> + XML Schema type should be the <code>::gender</code> C++ enum + (we use fully-qualified C++ names to avoid name clashes). + The next section will describe the type map format in detail. + We save this type map in <code>people.map</code> and + then translate our schemas with the <code>--type-map</code> + option to let the XSD/e compiler know about our type map:</p> + + <pre class="terminal"> +$ xsde cxx-parser --type-map people.map people.xsd + </pre> + + <p>If we now look at the generated <code>people-pskel.hxx</code>, + we will see the following changes in the <code>gender_pskel</code> and + <code>person_pskel</code> skeletons:</p> + + <pre class="c++"> +#include "people.hxx" + +class gender_pskel: public xml_schema::string_pskel +{ + virtual ::gender + post_gender () = 0; + + ... +}; + +class person_pskel: public xml_schema::parser_complex_content +{ + virtual void + gender (::gender); + + ... +}; + </pre> + + <p>Notice that <code>#include "people.hxx"</code> was added to + the generated header file from the type map to provide the + definition for the <code>gender</code> enum.</p> + + <h2><a name="4.2">4.2 Type Map File Format</a></h2> + + <p>Type map files are used to define a mapping between XML Schema + and C++ types. The compiler uses this information + to determine return types of <code>post_*()</code> + callbacks in parser skeletons corresponding to XML Schema + types as well as argument types for callbacks corresponding + to elements and attributes of these types.</p> + + <p>The compiler has a set of predefined mapping rules that map the + built-in XML Schema types to suitable C++ types (discussed + below) and all other types to <code>void</code>. + By providing your own type maps you can override these predefined + rules. The format of the type map file is presented below: + </p> + + <pre class="type-map"> +namespace <schema-namespace> [<cxx-namespace>] +{ + (include <file-name>;)* + ([type] <schema-type> <cxx-ret-type> [<cxx-arg-type>];)* +} + </pre> + + <p>Both <code><i><schema-namespace></i></code> and + <code><i><schema-type></i></code> are regex patterns while + <code><i><cxx-namespace></i></code>, + <code><i><cxx-ret-type></i></code>, and + <code><i><cxx-arg-type></i></code> are regex pattern + substitutions. All names can be optionally enclosed in + <code>" "</code>, for example, to include white-spaces.</p> + + <p><code><i><schema-namespace></i></code> determines XML + Schema namespace. Optional <code><i><cxx-namespace></i></code> + is prefixed to every C++ type name in this namespace declaration. + <code><i><cxx-ret-type></i></code> is a C++ type name that is + used as a return type for the <code>post_*()</code> callback. + Optional <code><i><cxx-arg-type></i></code> is an argument + type for callbacks corresponding to elements and attributes + of this type. If <code><i><cxx-arg-type></i></code> is not + specified, it defaults to <code><i><cxx-ret-type></i></code> + if <code><i><cxx-ret-type></i></code> ends with <code>*</code> or + <code>&</code> (that is, it is a pointer or a reference) and + <code>const <i><cxx-ret-type></i>&</code> + otherwise. + <code><i><file-name></i></code> is a file name either in the + <code>" "</code> or <code>< ></code> format + and is added with the <code>#include</code> directive to + the generated code.</p> + + <p>The <code><b>#</b></code> character starts a comment that ends + with a new line or end of file. To specify a name that contains + <code><b>#</b></code> enclose it in <code><b>" "</b></code>. + For example:</p> + + <pre> +namespace http://www.example.com/xmlns/my my +{ + include "my.hxx"; + + # Pass apples by value. + # + apple apple; + + # Pass oranges as pointers. + # + orange orange_t*; +} + </pre> + + <p>In the example above, for the + <code>http://www.example.com/xmlns/my#orange</code> + XML Schema type, the <code>my::orange_t*</code> C++ type will + be used as both return and argument types.</p> + + <p>Several namespace declarations can be specified in a single + file. The namespace declaration can also be completely + omitted to map types in a schema without a namespace. For + instance:</p> + + <pre class="type-map"> +include "my.hxx"; +apple apple; + +namespace http://www.example.com/xmlns/my +{ + orange "const orange_t*"; +} + </pre> + + <p>The compiler has a number of predefined mapping rules for + the built-in XML Schema types which can be presented as the + following map files:</p> + + <pre class="type-map"> +namespace http://www.w3.org/2001/XMLSchema +{ + boolean bool bool; + + byte "signed char" "signed char"; + unsignedByte "unsigned char" "unsigned char"; + + short short short; + unsignedShort "unsigned short" "unsigned short"; + + int int int; + unsignedInt "unsigned int" "unsigned int"; + + long "long long" "long long"; + unsignedLong "unsigned long long" "unsigned long long"; + + integer long long; + + negativeInteger long long; + nonPositiveInteger long long; + + positiveInteger "unsigned long" "unsigned long"; + nonNegativeInteger "unsigned long" "unsigned long"; + + float float float; + double double double; + decimal double double; + + NMTOKENS xml_schema::string_sequence*; + IDREFS xml_schema::string_sequence*; + + base64Binary xml_schema::buffer*; + hexBinary xml_schema::buffer*; + + date xml_schema::date; + dateTime xml_schema::date_time; + duration xml_schema::duration; + gDay xml_schema::gday; + gMonth xml_schema::gmonth; + gMonthDay xml_schema::gmonth_day; + gYear xml_schema::gyear; + gYearMonth xml_schema::gyear_month; + time xml_schema::time; +} + </pre> + + <p>If STL is enabled (<a href="#5.1">Section 5.1, "Standard Template + Library"</a>), the following mapping is used for the string-based + XML Schema built-in types:</p> + + <pre class="type-map"> +namespace http://www.w3.org/2001/XMLSchema +{ + include <string>; + + string std::string; + normalizedString std::string; + token std::string; + Name std::string; + NMTOKEN std::string; + NCName std::string; + ID std::string; + IDREF std::string; + language std::string; + anyURI std::string; + + QName xml_schema::qname; +} + </pre> + + <p>Otherwise, a C string-based mapping is used:</p> + + <pre class="type-map"> +namespace http://www.w3.org/2001/XMLSchema +{ + string char*; + normalizedString char*; + token char*; + Name char*; + NMTOKEN char*; + NCName char*; + ID char*; + IDREF char*; + language char*; + anyURI char*; + + QName xml_schema::qname*; +} + </pre> + + <p>For more information about the mapping of the built-in XML Schema types + to C++ types refer to <a href="#6">Chapter 6, "Built-In XML Schema Type + Parsers"</a>. The last predefined rule maps anything that wasn't + mapped by previous rules to <code>void</code>:</p> + + <pre class="type-map"> +namespace .* +{ + .* void void; +} + </pre> + + + <p>When you provide your own type maps with the + <code>--type-map</code> option, they are evaluated first. This + allows you to selectively override any + of the predefined rules. Note also that if you change the mapping + of a built-in XML Schema type then it becomes your responsibility + to provide the corresponding parser skeleton and implementation + in the <code>xml_schema</code> namespace. You can include the + custom definitions into the generated header file using the + <code>--hxx-prologue-*</code> options.</p> + + <h2><a name="4.3">4.3 Parser Implementations</a></h2> + + <p>With the knowledge from the previous section, we can proceed + with creating a type map that maps types in the <code>people.xsd</code> + schema to our object model classes in + <code>people.hxx</code>. In fact, we already have the beginning + of our type map file in <code>people.map</code>. Let's extend + it with the rest of the types:</p> + + <pre class="type-map"> +include "people.hxx"; + +gender ::gender ::gender; +person ::person; +people ::people; + </pre> + + <p>A few things to note about this type map. We did not + provide the argument types for <code>person</code> and + <code>people</code> because the default constant reference is + exactly what we need. We also did not provide any mappings + for built-in XML Schema types <code>string</code> and + <code>short</code> because they are handled by the predefined + rules and we are happy with the result. Note also that + all C++ types are fully qualified. This is done to avoid + potential name conflicts in the generated code. Now we can + recompile our schema and move on to implementing the parsers:</p> + + <pre class="terminal"> +$ xsde cxx-parser --type-map people.map people.xsd + </pre> + + <p>Here is the implementation of our three parsers in full. One + way to save typing when implementing your own parsers is + to open the generated code and copy the signatures of parser + callbacks into your code. Or you could always auto generate the + sample implementations and fill them with your code.</p> + + <pre class="c++"> +#include "people-pskel.hxx" + +class gender_pimpl: public gender_pskel +{ +public: + gender_pimpl () + : gender_pskel (&base_impl_) + { + } + + virtual ::gender + post_gender () + { + return post_string () == "male" ? male : female; + } + +private: + xml_schema::string_pimpl base_impl_; +}; + +class person_pimpl: public person_pskel +{ +public: + virtual void + first_name (const std::string& f) + { + first_ = f; + } + + virtual void + last_name (const std::string& l) + { + last_ = l; + } + + virtual void + gender (::gender g) + { + gender_ = g; + } + + virtual void + age (short a) + { + age_ = a; + } + + virtual ::person + post_person () + { + return ::person (first_, last_, gender_, age_); + } + +private: + std::string first_; + std::string last_; + ::gender gender_; + short age_; +}; + +class people_pimpl: public people_pskel +{ +public: + virtual void + person (const ::person& p) + { + people_.push_back (p); + } + + virtual ::people + post_people () + { + ::people r; + r.swap (people_); + return r; + } + +private: + ::people people_; +}; + </pre> + + <p>This code fragment should look familiar by now. Just note that + all the <code>post_*()</code> callbacks now have return types instead + of <code>void</code>. Here is the implementation of the test + driver for this example:</p> + + <pre class="c++"> +#include <iostream> + +using namespace std; + +int +main (int argc, char* argv[]) +{ + // Construct the parser. + // + xml_schema::short_pimpl short_p; + xml_schema::string_pimpl string_p; + + gender_pimpl gender_p; + person_pimpl person_p; + people_pimpl people_p; + + person_p.parsers (string_p, string_p, gender_p, short_p); + people_p.parsers (person_p); + + // Parse the document to obtain the object model. + // + xml_schema::document_pimpl doc_p (people_p, "people"); + + people_p.pre (); + doc_p.parse (argv[1]); + people ppl = people_p.post_people (); + + // Print the object model. + // + for (people::iterator i (ppl.begin ()); i != ppl.end (); ++i) + { + cout << "first: " << i->first () << endl + << "last: " << i->last () << endl + << "gender: " << (i->gender () == male ? "male" : "female") << endl + << "age: " << i->age () << endl + << endl; + } +} + </pre> + + <p>The parser creation and assembly part is exactly the same as in + the previous chapter. The parsing part is a bit different: + <code>post_people()</code> now has a return value which is the + complete object model. We store it in the + <code>ppl</code> variable. The last bit of the code simply iterates + over the <code>people</code> vector and prints the information + for each person. We save the last two code fragments to + <code>driver.cxx</code> and proceed to compile and test + our new application:</p> + + + <pre class="terminal"> +$ c++ -I.../libxsde -c driver.cxx people-pskel.cxx +$ c++ -o driver driver.o people-pskel.o .../libxsde/xsde/libxsde.a +$ ./driver people.xml +first: John +last: Doe +gender: male +age: 32 + +first: Jane +last: Doe +gender: female +age: 28 + </pre> + + + <!-- Mapping Configuration --> + + + <h1><a name="5">5 Mapping Configuration</a></h1> + + <p>The Embedded C++/Parser mapping has a number of configuration + parameters that determine the overall properties and behavior + of the generated code, such as the use of Standard Template + Library (STL), Input/Output Stream Library (iostream), C++ + exceptions, XML Schema validation, 64-bit integer types, parser + implementation reuse styles, and support for XML Schema polymorphism. + Previous chapters assumed that the use of STL, iostream, C++ + exceptions, and XML Schema validation were enabled. + This chapter will discuss the changes in the Embedded C++/Parser + programming model that result from the changes to these configuration + parameters. A complete example that uses the minimal mapping + configuration is presented at the end of this chapter.</p> + + <p>In order to enable or disable a particular feature, the corresponding + configuration parameter should be set accordingly in the XSD/e runtime + library as well as specified during schema compilation with the XSD/e + command line options as described in the + <a href="http://www.codesynthesis.com/projects/xsde/documentation/xsde.xhtml">XSD/e + Compiler Command Line Manual</a>. + </p> + + <p>While the XML documents can use various encodings, the Embedded + C++/Parser mapping always delivers character data to the application + in the UTF-8 encoding. The underlying XML parser used by the + Embedded C++/Parser mapping includes built-in support for XML + documents encoded in UTF-8, UTF-16, ISO-8859-1, and US-ASCII. + Other encodings can be supported by providing application-specific + decoder functions.</p> + + <h2><a name="5.1">5.1 Standard Template Library</a></h2> + + <p>To disable the use of STL you will need to configure the XSD/e + runtime without support for STL as well as pass the + <code>--no-stl</code> option to the XSD/e compiler when + translating your schemas. When STL is disabled, all string-based + XML Schema types are mapped to C-style <code>char*</code> instead + of <code>std::string</code>, as described in + <a href="#4.2">Section 4.2, "Type Map File Format"</a>. The + following code fragment shows changes in the + signatures of <code>first_name()</code> and <code>last_name()</code> + callbacks from the person record example.</p> + + <pre class="c++"> +class person_pskel +{ +public: + virtual void + first_name (char*); + + virtual void + last_name (char*); + + ... +}; + </pre> + + <p>Note that it is your responsibility to eventually release the memory + associated with these strings using operator <code>delete[]</code>. + </p> + + <h2><a name="5.2">5.2 Input/Output Stream Library</a></h2> + + <p>To disable the use of iostream you will need to configure the + XSD/e runtime library without support for iostream as well as + pass the <code>--no-iostream</code> option to the XSD/e compiler + when translating your schemas. When iostream is disabled, the + following two <code>parse()</code> functions in the + <code>xml_schema::document_pimpl</code> class become unavailable:</p> + + <pre class="c++"> + void + parse (const std::string& file); + + void + parse (std::istream&); + </pre> + + <p>Leaving you with only one function in the form:</p> + + <pre class="c++"> + void + parse (const void* data, size_t size, bool last); + </pre> + + <p>See <a href="#7.1">Section 7.1, "Document Parser"</a> + for more information on the semantics of these functions.</p> + + <h2><a name="5.3">5.3 C++ Exceptions</a></h2> + + <p>To disable the use of C++ exceptions, you will need to configure + the XSD/e runtime without support for exceptions as well as pass + the <code>--no-exceptions</code> option to the XSD/e compiler + when translating your schemas. When C++ exceptions are disabled, + the error conditions are indicated with error codes instead of + exceptions, as described in <a href="#7.3">Section 7.3, + "Error Codes"</a>. + </p> + + <h2><a name="5.4">5.4 XML Schema Validation</a></h2> + + <p>To disable support for XML Schema validation, you will need to + configure the XSD/e runtime accordingly as well as pass + the <code>--suppress-validation</code> option to the XSD/e compiler + when translating your schemas. Disabling XML Schema validation + allows to further increase the parsing performance and + reduce footprint in cases where XML instances are known to be + valid. + </p> + + <h2><a name="5.5">5.5 64-bit Integer Type</a></h2> + + <p>By default the 64-bit <code>long</code> and <code>unsignedLong</code> + XML Schema built-in types are mapped to the 64-bit <code>long long</code> + and <code>unsigned long long</code> fundamental C++ types. To + disable the use of these types in the mapping you will need to + configure the XSD/e runtime accordingly as well as pass + the <code>--no-long-long</code> option to the XSD/e compiler + when translating your schemas. When the use of 64-bit integral + C++ types is disabled the <code>long</code> and + <code>unsignedLong</code> XML Schema built-in types are mapped + to <code>long</code> and <code>unsigned long</code> fundamental + C++ types.</p> + + <h2><a name="5.6">5.6 Parser Reuse</a></h2> + + <p>When one type in XML Schema inherits from another, it is + often desirable to be able to reuse the parser implementation + corresponding to the base type in the parser implementation + corresponding to the derived type. XSD/e provides support + for two parser reuse styles: the so-called <em>mixin</em> + (generated when the <code>--reuse-style-mixin</code> option + is specified) and <em>tiein</em> (generated by default) styles.</p> + + <p>The compiler can also be instructed not to generate any support + for parser reuse with the <code>--reuse-style-none</code> option. + This is mainly useful to further reduce the generated code size + when your vocabulary does not use inheritance or when you plan + to implement each parser from scratch. Note also that the + XSD/e runtime should be configured in accordance with the + parser reuse style used in the generated code. The remainder + of this section discusses the mixin and tiein parser reuse + styles in more detail.</p> + + + <p>To provide concrete examples for each reuse style we will use the + following schema fragment:</p> + + <pre class="xml"> +<xs:complexType name="person"> + <xs:sequence> + <xs:element name="first-name" type="xs:string"/> + <xs:element name="last-name" type="xs:string"/> + <xs:element name="age" type="xs:short"/> + </xs:sequence> +</xs:complexType> + +<xs:complexType name="emplyee"> + <complexContent> + <extension base="person"> + <xs:sequence> + <xs:element name="position" type="xs:string"/> + <xs:element name="salary" type="xs:unsignedLong"/> + </xs:sequence> + </extension> + </complexContent> +</xs:complexType> + </pre> + + <p>The mixin parser reuse style uses the C++ mixin idiom that + relies on multiple and virtual inheritance. Because + virtual inheritance can result in a significant object + code size increase, this reuse style should be considered + when such an overhead is acceptable and/or the vocabulary + consists of only a handful of types. When the mixin reuse + style is used, the generated parser skeletons use virtual + inheritance, for example:</p> + + <pre class="c++"> +class person_pskel: public virtual parser_complex_content +{ + ... +}; + +class employee_pskel: public virtual person_pskel +{ + ... +}; + </pre> + + + <p>When you implement the base parser you also need to use + virtual inheritance. The derived parser is implemented + by inheriting from both the derived parser skeleton and + the base parser implementation (that is, <em>mixing in</em> + the base parser implementation), for example:</p> + + <pre class="c++"> +class person_pimpl: public virtual person_pskel +{ + ... +}; + +class employee_pimpl: public employee_pskel, + public person_pimpl +{ + ... +}; + </pre> + + + <p>The tiein parser reuse style uses delegation and normally + results in a significantly smaller object code while being + almost as convenient to use as the mixin style. When the + tiein reuse style is used, the generated derived parser + skeleton declares a constructor which allows you to specify + the implementation of the base parser:</p> + + <pre class="c++"> +class person_pskel: public parser_complex_content +{ + ... +}; + +class employee_pskel: public person_pskel +{ +public: + employee_pskel (person_pskel* base_impl) + + ... +}; + </pre> + + <p>If you pass the implementation of the base parser to this + constructor then the generated code will transparently + forward all the callbacks corresponding to the base parser + skeleton to this implementation. You can also pass + <code>0</code> to this constructor in which case you will + need to implement the derived parser from scratch. The + following example shows how we could implement the + <code>person</code> and <code>employee</code> parsers + using the tiein style:</p> + + <pre class="c++"> +class person_pimpl: public person_pskel +{ + ... +}; + +class employee_pimpl: public employee_pskel +{ +public: + employee_pimpl () + : employee_pskel (&base_impl_) + { + } + + ... + +private: + person_pimpl base_impl_; +}; + </pre> + + <p>Note that you cannot use the <em>tied in</em> base parser + instance (<code>base_impl_</code> in the above code) for + parsing anything except the derived type.</p> + + <p>The ability to override the base parser callbacks in the + derived parser is also available in the tiein style. For + example, the following code fragment shows how we can + override the <code>age()</code> callback if we didn't + like the implementation provided by the base parser:</p> + + <pre class="c++"> +class employee_pimpl: public employee_pskel +{ +public: + employee_pimpl () + : employee_pskel (&base_impl_) + { + } + + virtual void + age (short a) + { + ... + } + + ... + +private: + person_pimpl base_impl_; +}; + </pre> + + <p>In the above example the <code>age</code> element will be + handled by <code>emplyee_pimpl</code> while the <code>first-name</code> + and <code>last-name</code> callbacks will still go to + <code>base_impl_</code>.</p> + + <p>It is also possible to inherit from the base parser implementation + instead of declaring it as a member variable. This can be useful + if you need to access protected members in the base implementation + or need to override a virtual function that is not part of + the parser skeleton interface. Note, however, that in this case + you will need to resolve a number of ambiguities with explicit + qualifications or using-declarations. For example:</p> + + + <pre class="c++"> +class person_pimpl: public person_pskel +{ + ... +protected: + virtual person* + create () + { + return new person (); + } +}; + +class employee_pimpl: public employee_pskel, + public person_pimpl +{ +public: + employee_pimpl () + : employee_pskel (static_cast<person_pimpl*> (this)) + { + } + + // Resolve ambiguities. + // + using emplyee_pskel::parsers; + + ... + +protected: + virtual employee* + create () + { + return new employee (); + } +}; + </pre> + + + <h2><a name="5.7">5.7 Support for Polymorphism</a></h2> + + <p>By default the XSD/e compiler generates non-polymorphic code. If your + vocabulary uses XML Schema polymorphism in the form of <code>xsi:type</code> + and/or substitution groups, then you will need to configure the XSD/e + runtime with support for polymorphism, compile your schemas with the + <code>--generate-polymorphic</code> option to produce polymorphism-aware + code, as well as pass <code>true</code> as the last argument to the + <code>xml_schema::document</code>'s constructors. If some of your + schemas do not require support for polymorphism then you can compile + them with the <code>--runtime-polymorphic</code> option and still + use the XSD/e runtime configured with polymorphism support. + </p> + + <p>When using the polymorphism-aware generated code, you can specify + several parsers for a single element by passing a parser map + instead of an individual parser to the parser connection function + for the element. One of the parsers will then be looked up and used + depending on the <code>xsi:type</code> attribute value or an element + name from a substitution group. Consider the following schema as an + example:</p> + + <pre class="xml"> +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> + + <xs:complexType name="person"> + <xs:sequence> + <xs:element name="name" type="xs:string"/> + </xs:sequence> + </xs:complexType> + + <!-- substitution group root --> + <xs:element name="person" type="person"/> + + <xs:complexType name="superman"> + <xs:complexContent> + <xs:extension base="person"> + <xs:attribute name="can-fly" type="xs:boolean"/> + </xs:extension> + </xs:complexContent> + </xs:complexType> + + <xs:element name="superman" + type="superman" + substitutionGroup="person"/> + + <xs:complexType name="batman"> + <xs:complexContent> + <xs:extension base="superman"> + <xs:attribute name="wing-span" type="xs:unsignedInt"/> + </xs:extension> + </xs:complexContent> + </xs:complexType> + + <xs:element name="batman" + type="batman" + substitutionGroup="superman"/> + + <xs:complexType name="supermen"> + <xs:sequence> + <xs:element ref="person" maxOccurs="unbounded"/> + </xs:sequence> + </xs:complexType> + + <xs:element name="supermen" type="supermen"/> + +</xs:schema> + </pre> + + <p>Conforming XML documents can use the <code>superman</code> + and <code>batman</code> types in place of the <code>person</code> + type either by specifying the type with the <code>xsi:type</code> + attributes or by using the elements from the substitution + group, for instance:</p> + + + <pre class="xml"> +<supermen xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + + <person> + <name>John Doe</name> + </person> + + <superman can-fly="false"> + <name>James "007" Bond</name> + </superman> + + <superman can-fly="true" wing-span="10" xsi:type="batman"> + <name>Bruce Wayne</name> + </superman> + +</supermen> + </pre> + + <p>To print the data stored in such XML documents we can implement + the parsers as follows:</p> + + <pre class="c++"> +class person_pimpl: public person_pskel +{ +public: + virtual void + pre () + { + cout << "starting to parse person" << endl; + } + + virtual void + name (const std::string& v) + { + cout << "name: " << v << endl; + } + + virtual void + post_person () + { + cout << "finished parsing person" << endl; + } +}; + +class superman_pimpl: public superman_pskel +{ +public: + superman_pimpl () + : superman_pskel (&base_impl_) + { + } + + virtual void + pre () + { + cout << "starting to parse superman" << endl; + } + + virtual void + can_fly (bool v) + { + cout << "can-fly: " << v << endl; + } + + virtual void + post_person () + { + post_superman (); + } + + virtual void + post_superman () + { + cout << "finished parsing superman" << endl + } + +private: + person_pimpl base_impl_; +}; + +class batman_pimpl: public batman_pskel +{ +public: + batman_pimpl () + : batman_pskel (&base_impl_) + { + } + + virtual void + pre () + { + cout << "starting to parse batman" << endl; + } + + virtual void + wing_span (unsigned int v) + { + cout << "wing-span: " << v << endl; + } + + virtual void + post_person () + { + post_superman (); + } + + virtual void + post_superman () + { + post_batman (); + } + + virtual void + post_batman () + { + cout << "finished parsing batman" << endl; + } + +private: + superman_pimpl base_impl_; +}; + </pre> + + <p>Note that because the derived type parsers (<code>superman_pskel</code> + and <code>batman_pskel</code>) are called via the <code>person_pskel</code> + interface, we have to override the <code>post_person()</code> virtual + function in <code>superman_pimpl</code> and <code>batman_pimpl</code> + to call <code>post_superman()</code> and the <code>post_superman()</code> + virtual function in <code>batman_pimpl</code> to call + <code>post_batman()</code> (when the mixin parser reuse style is used + it is not necessary to override <code>post_person()</code> + in <code>batman_pimpl</code> since the suitable implementation + is inherited from <code>superman_pimpl</code>).</p> + + <p>The following code fragment shows how to connect the parsers together. + Notice that for the <code>person</code> element in the <code>supermen_p</code> + parser we specify a parser map instead of a specific parser and we pass + <code>true</code> as the last argument to the document parser constructor + to indicate that we are parsing potentially-polymorphic XML documents:</p> + + <pre class="c++"> +int +main (int argc, char* argv[]) +{ + // Construct the parser. + // + xml_schema::string_pimpl string_p; + xml_schema::boolean_pimpl boolean_p; + xml_schema::unsigned_int_pimpl unsigned_int_p; + + person_pimpl person_p; + superman_pimpl superman_p; + batman_pimpl batman_p; + + xml_schema::parser_map_impl person_map (5); // 5 hashtable buckets + supermen_pimpl supermen_p; + + person_p.parsers (string_p); + superman_p.parsers (string_p, boolean_p); + batman_p.parsers (string_p, boolean_p, unsigned_int_p); + + // Here we are specifying several parsers that can be used to + // parse the person element. + // + person_map.insert (person_p); + person_map.insert (superman_p); + person_map.insert (batman_p); + + supermen_p.person_parser (person_map); + + // Parse the XML document. The last argument to the document's + // constructor indicates that we are parsing polymorphic XML + // documents. + // + xml_schema::document_pimpl doc_p (supermen_p, "supermen", true); + + supermen_p.pre (); + doc_p.parse (argv[1]); + supermen_p.post_supermen (); +} + </pre> + + <p>When polymorphism-aware code is generated, each element's + <code>*_parser()</code> function is overloaded to also accept + an object of the <code>xml_schema::parser_map</code> type. + For example, the <code>supermen_pskel</code> class from the + above example looks like this:</p> + + <pre class="c++"> +class supermen_pskel: public xml_schema::parser_complex_content +{ +public: + + ... + + // Parser construction API. + // + void + parsers (person_pskel&); + + // Individual element parsers. + // + void + person_parser (person_pskel&); + + void + person_parser (xml_schema::parser_map&); + + ... +}; + </pre> + + <p>Note that you can specify both the individual (static) parser and + the parser map. The individual parser will be used when the static + element type and the dynamic type of the object being parsed are + the same. This is the case, for example, when there is no + <code>xsi:type</code> attribute and the element hasn't been + substituted. Because the individual parser for an element is + cached and no map lookup is necessary, it makes sense to specify + both the individual parser and the parser map when most of the + objects being parsed are of the static type and optimal + performance is important. The following code fragment shows + how to change the above example to set both the individual + parser and the parser map:</p> + + <pre class="c++"> +int +main (int argc, char* argv[]) +{ + ... + + // Here we are specifying several parsers that can be used to + // parse the person element. + // + person_map.insert (superman_p); + person_map.insert (batman_p); + + supermen_p.person_parser (person_p); + supermen_p.person_parser (person_map); + + ... +} + </pre> + + + <p>The <code>xml_schema::parser_map</code> interface and the + <code>xml_schema::parser_map_impl</code> default implementation + are presented below:</p> + + <pre class="c++"> +namespace xml_schema +{ + class parser_map + { + public: + virtual parser_base* + find (const char* type) const = 0; + + virtual void + reset () const = 0; + }; + + class parser_map_impl: public parser_map + { + public: + parser_map_impl (size_t buckets); + + void + insert (parser_base&); + + virtual parser_base* + find (const char* type) const; + + virtual void + reset () const; + + private: + parser_map_impl (const parser_map_impl&); + + parser_map_impl& + operator= (const parser_map_impl&); + + ... + }; +} + </pre> + + <p>The <code>type</code> argument in the <code>find()</code> virtual + function is the type name and namespace from the xsi:type attribute + (the namespace prefix is resolved to the actual XML namespace) + or the type of an element from the substitution group in the form + <code>"<name> <namespace>"</code> with the space and the + namespace part absent if the type does not have a namespace. + You can obtain a parser's dynamic type in the same format + using the <code>_dynamic_type()</code> function. The static + type can be obtained by calling the static <code>_static_type()</code> + function, for example <code>person_pskel::_static_type()</code>. + Both functions return a C string (<code>const char*</code>) which + is valid for as long as the application is running. The + <code>reset()</code> virtual function is used to reset + the parsers contained in the map (as opposed to resetting or + clearing the map itself). For more information on parser + resetting refer to <a href="#7.4">Section 7.4, "Reusing Parsers + after an Error"</a>. The following example shows how we can + implement our own parser map using <code>std::map</code>:</p> + + + <pre class="c++"> +#include <map> +#include <string> + +class parser_map: public xml_schema::parser_map +{ +public: + void + insert (xml_schema::parser_base& p) + { + map_[p._dynamic_type ()] = &p; + } + + virtual xml_schema::parser_base* + find (const char* type) const + { + map::const_iterator i = map_.find (type); + return i != map_.end () ? i->second : 0; + } + + virtual void + reset () const + { + for (map::const_iterator i (map_.begin ()), e (map_.end ()); + i != e; ++i) + { + xml_schema::parser_base* p = i->second; + p->_reset (); + } + } + +private: + typedef std::map<std::string, xml_schema::parser_base*> map; + map map_; +}; + </pre> + + <p>The XSD/e runtime provided the default implementation for the + <code>xml_schema::parser_map</code> interface, + <code>xml_schema::parser_map_impl</code>, which is a hashmap. + It requires that you specify the number of buckets it will contain + and it does not support automatic table resizing. To obtain good + performance the elements to buckets ratio should be between 0.7 and + 0.9. It is also recommended to use prime numbers for bucket counts: + 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317, + 196613, 393241. + </p> + + <p>If C++ exceptions are disabled (<a href="#5.3">Section 5.3, + "C++ Exceptions"</a>), the <code>xml_schema::parser_map_impl</code> + class has the following additional error querying API. It can be used + to detect the out of memory errors after calls to the + <code>parser_map_impl</code>'s constructor and <code>insert()</code> + function.</p> + + <pre class="c++"> +namespace xml_schema +{ + class parser_map_impl: public parser_map + { + public: + enum error + { + error_none, + error_no_memory + }; + + error + _error () const; + + ... + }; +} + </pre> + + <p>To support polymorphic parsing the XSD/e runtime and generated code + maintain a number of hashmaps that contain substitution and, if + XML Schema validation is enabled (<a href="#5.4">Section 5.4, + "XML Schema Validation"</a>), inheritance information. Because + the number of elements in these hashmaps depends on the schemas + being compiled and thus is fairly static, these hashmaps do not + perform automatic table resizing and instead the number of buckets + is specified when the XSD/e runtime is configured. To obtain good + performance the elements to buckets ratio in these hashmaps should + be between 0.7 and 0.9. The recommended way to ensure this range + is to add diagnostics code to your application as shown in the + following example:</p> + + <pre class="c++"> +int +main () +{ + // Check that the load in substitution and inheritance hashmaps + // is not too high. + // +#ifndef NDEBUG + float load = xml_schema::parser_smap_elements (); + load /= xml_schema::parser_smap_buckets (); + + if (load > 0.8) + { + cerr << "substitution hashmap load is " << load << endl; + cerr << "time to increase XSDE_PARSER_SMAP_BUCKETS" << endl; + } + + load = xml_schema::parser_imap_elements (); + load /= xml_schema::parser_imap_buckets (); + + if (load > 0.8) + { + cerr << "inheritance hashmap load is " << load << endl; + cerr << "time to increase XSDE_PARSER_IMAP_BUCKETS" << endl; + } +#endif + + ... +} + </pre> + + <p>Most of the code presented in this section is taken from the + <code>polymorphism</code> example which can be found in the + <code>examples/cxx/parser/</code> directory of the XSD/e distribution. + Handling of <code>xsi:type</code> and substitution groups when used + on root elements requires a number of special actions as shown in + the <code>polyroot</code> example.</p> + + <h2><a name="5.8">5.8 A Minimal Example</a></h2> + + <p>The following example is a re-implementation of the person + records example presented in <a href="#3">Chapter 3, + "Parser Skeletons"</a>. It is intended to work + without STL, iostream, and C++ exceptions. It can be found in + the <code>examples/cxx/parser/minimal/</code> directory of the + XSD/e distribution. The <code>people.xsd</code> schema is + compiled with the <code>--no-stl</code>, <code>--no-iostream</code>, + and <code>--no-exceptions</code> options. The following listing + presents the implementation of parser skeletons and the test + driver in full.</p> + + <pre class="c++"> +#include <stdio.h> + +#include "people-pskel.hxx" + +class gender_pimpl: public gender_pskel +{ +public: + gender_pimpl () + : gender_pskel (&base_impl_) + { + } + + virtual void + post_gender () + { + char* s = post_string (); + printf ("gender: %s\n", s); + delete[] s; + } + +private: + xml_schema::string_pimpl base_impl_; +}; + +class person_pimpl: public person_pskel +{ +public: + virtual void + first_name (char* n) + { + printf ("first: %s\n", n); + delete[] n; + } + + virtual void + last_name (char* n) + { + printf ("last: %s\n", n); + delete[] n; + } + + virtual void + age (short a) + { + printf ("age: %hd\n", a); + } +}; + +class people_pimpl: public people_pskel +{ +public: + virtual void + person () + { + // Add an extra newline after each person record. + // + printf ("\n"); + } +}; + +int +main (int argc, char* argv[]) +{ + // Construct the parser. + // + xml_schema::short_pimpl short_p; + xml_schema::string_pimpl string_p; + + gender_pimpl gender_p; + person_pimpl person_p; + people_pimpl people_p; + + person_p.parsers (string_p, string_p, gender_p, short_p); + people_p.parsers (person_p); + + // Open the file. + // + FILE* f = fopen (argv[1], "rb"); + + if (f == 0) + { + fprintf (stderr, "%s: unable to open\n", argv[1]); + return 1; + } + + // Parse. + // + typedef xml_schema::parser_error error; + error e; + bool io_error = false; + + do + { + xml_schema::document_pimpl doc_p (people_p, "people"); + if (e = doc_p._error ()) + break; + + people_p.pre (); + if (e = people_p._error ()) + break; + + char buf[4096]; + do + { + size_t s = fread (buf, 1, sizeof (buf), f); + + if (s != sizeof (buf) && ferror (f)) + { + io_error = true; + break; + } + + doc_p.parse (buf, s, feof (f) != 0); + e = doc_p._error (); + + } while (!e && !feof (f)); + + if (io_error || e) + break; + + people_p.post_people (); + e = people_p._error (); + + } while (false); + + fclose (f); + + // Handle errors. + // + + if (io_error) + { + fprintf (stderr, "%s: read failure\n", argv[1]); + return 1; + } + + if (e) + { + switch (e.type ()) + { + case error::sys: + { + fprintf (stderr, "%s: %s\n", argv[1], e.sys_text ()); + break; + } + case error::xml: + { + fprintf (stderr, "%s:%lu:%lu: %s\n", + argv[1], e.line (), e.column (), e.xml_text ()); + break; + } + case error::schema: + { + fprintf (stderr, "%s:%lu:%lu: %s\n", + argv[1], e.line (), e.column (), e.schema_text ()); + break; + } + case error::app: + { + fprintf (stderr, "%s:%lu:%lu: application error %d\n", + argv[1], e.line (), e.column (), e.app_code ()); + break; + } + default: + break; + } + return 1; + } + return 0; +} + </pre> + + + <!-- Built-in XML Schema Type Parsers --> + + + <h1><a name="6">6 Built-In XML Schema Type Parsers</a></h1> + + <p>The XSD/e runtime provides parser implementations for all built-in + XML Schema types as summarized in the following table. Declarations + for these types are automatically included into each generated + header file. As a result you don't need to include any headers + to gain access to these parser implementations.</p> + + <!-- border="1" is necessary for html2ps --> + <table id="builtin" border="1"> + <tr> + <th>XML Schema type</th> + <th>Parser implementation in the <code>xml_schema</code> namespace</th> + <th>Parser return type</th> + </tr> + + <tr> + <th colspan="3">anyType and anySimpleType types</th> + </tr> + <tr> + <td><code>anyType</code></td> + <td><code>any_type_pimpl</code></td> + <td><code>void</code></td> + </tr> + <tr> + <td><code>anySimpleType</code></td> + <td><code>any_simple_type_pimpl</code></td> + <td><code>void</code></td> + </tr> + + <tr> + <th colspan="3">fixed-length integral types</th> + </tr> + <!-- 8-bit --> + <tr> + <td><code>byte</code></td> + <td><code>byte_pimpl</code></td> + <td><code>signed char</code></td> + </tr> + <tr> + <td><code>unsignedByte</code></td> + <td><code>unsigned_byte_pimpl</code></td> + <td><code>unsigned char</code></td> + </tr> + + <!-- 16-bit --> + <tr> + <td><code>short</code></td> + <td><code>short_pimpl</code></td> + <td><code>short</code></td> + </tr> + <tr> + <td><code>unsignedShort</code></td> + <td><code>unsigned_short_pimpl</code></td> + <td><code>unsigned short</code></td> + </tr> + + <!-- 32-bit --> + <tr> + <td><code>int</code></td> + <td><code>int_pimpl</code></td> + <td><code>int</code></td> + </tr> + <tr> + <td><code>unsignedInt</code></td> + <td><code>unsigned_int_pimpl</code></td> + <td><code>unsigned int</code></td> + </tr> + + <!-- 64-bit --> + <tr> + <td><code>long</code></td> + <td><code>long_pimpl</code></td> + <td><code>long long</code> or <code>long</code><br/> + <a href="#5.5">Section 5.5, "64-bit Integer Type"</a></td> + </tr> + <tr> + <td><code>unsignedLong</code></td> + <td><code>unsigned_long_pimpl</code></td> + <td><code>unsigned long long</code> or + <code>unsigned long</code><br/> + <a href="#5.5">Section 5.5, "64-bit Integer Type"</a></td> + </tr> + + <tr> + <th colspan="3">arbitrary-length integral types</th> + </tr> + <tr> + <td><code>integer</code></td> + <td><code>integer_pimpl</code></td> + <td><code>long</code></td> + </tr> + <tr> + <td><code>nonPositiveInteger</code></td> + <td><code>non_positive_integer_pimpl</code></td> + <td><code>long</code></td> + </tr> + <tr> + <td><code>nonNegativeInteger</code></td> + <td><code>non_negative_integer_pimpl</code></td> + <td><code>unsigned long</code></td> + </tr> + <tr> + <td><code>positiveInteger</code></td> + <td><code>positive_integer_pimpl</code></td> + <td><code>unsigned long</code></td> + </tr> + <tr> + <td><code>negativeInteger</code></td> + <td><code>negative_integer_pimpl</code></td> + <td><code>long</code></td> + </tr> + + <tr> + <th colspan="3">boolean types</th> + </tr> + <tr> + <td><code>boolean</code></td> + <td><code>boolean_pimpl</code></td> + <td><code>bool</code></td> + </tr> + + <tr> + <th colspan="3">fixed-precision floating-point types</th> + </tr> + <tr> + <td><code>float</code></td> + <td><code>float_pimpl</code></td> + <td><code>float</code></td> + </tr> + <tr> + <td><code>double</code></td> + <td><code>double_pimpl</code></td> + <td><code>double</code></td> + </tr> + + <tr> + <th colspan="3">arbitrary-precision floating-point types</th> + </tr> + <tr> + <td><code>decimal</code></td> + <td><code>decimal_pimpl</code></td> + <td><code>double</code></td> + </tr> + + <tr> + <th colspan="3">string-based types</th> + </tr> + <tr> + <td><code>string</code></td> + <td><code>string_pimpl</code></td> + <td><code>std::string</code> or <code>char*</code><br/> + <a href="#5.1">Section 5.1, "Standard Template Library"</a></td> + </tr> + <tr> + <td><code>normalizedString</code></td> + <td><code>normalized_string_pimpl</code></td> + <td><code>std::string</code> or <code>char*</code><br/> + <a href="#5.1">Section 5.1, "Standard Template Library"</a></td> + </tr> + <tr> + <td><code>token</code></td> + <td><code>token_pimpl</code></td> + <td><code>std::string</code> or <code>char*</code><br/> + <a href="#5.1">Section 5.1, "Standard Template Library"</a></td> + </tr> + <tr> + <td><code>Name</code></td> + <td><code>name_pimpl</code></td> + <td><code>std::string</code> or <code>char*</code><br/> + <a href="#5.1">Section 5.1, "Standard Template Library"</a></td> + </tr> + <tr> + <td><code>NMTOKEN</code></td> + <td><code>nmtoken_pimpl</code></td> + <td><code>std::string</code> or <code>char*</code><br/> + <a href="#5.1">Section 5.1, "Standard Template Library"</a></td> + </tr> + <tr> + <td><code>NCName</code></td> + <td><code>ncname_pimpl</code></td> + <td><code>std::string</code> or <code>char*</code><br/> + <a href="#5.1">Section 5.1, "Standard Template Library"</a></td> + </tr> + + <tr> + <td><code>language</code></td> + <td><code>language_pimpl</code></td> + <td><code>std::string</code> or <code>char*</code><br/> + <a href="#5.1">Section 5.1, "Standard Template Library"</a></td> + </tr> + + <tr> + <th colspan="3">qualified name</th> + </tr> + <tr> + <td><code>QName</code></td> + <td><code>qname_pimpl</code></td> + <td><code>xml_schema::qname</code> or <code>xml_schema::qname*</code><br/> + <a href="#6.1">Section 6.1, "<code>QName</code> Parser"</a></td> + </tr> + + <tr> + <th colspan="3">ID/IDREF types</th> + </tr> + <tr> + <td><code>ID</code></td> + <td><code>id_pimpl</code></td> + <td><code>std::string</code> or <code>char*</code><br/> + <a href="#5.1">Section 5.1, "Standard Template Library"</a></td> + </tr> + <tr> + <td><code>IDREF</code></td> + <td><code>idref_pimpl</code></td> + <td><code>std::string</code> or <code>char*</code><br/> + <a href="#5.1">Section 5.1, "Standard Template Library"</a></td> + </tr> + + <tr> + <th colspan="3">list types</th> + </tr> + <tr> + <td><code>NMTOKENS</code></td> + <td><code>nmtokens_pimpl</code></td> + <td><code>xml_schema::string_sequence*</code><br/><a href="#6.2">Section + 6.2, "<code>NMTOKENS</code> and <code>IDREFS</code> Parsers"</a></td> + </tr> + <tr> + <td><code>IDREFS</code></td> + <td><code>idrefs_pimpl</code></td> + <td><code>xml_schema::string_sequence*</code><br/><a href="#6.2">Section + 6.2, "<code>NMTOKENS</code> and <code>IDREFS</code> Parsers"</a></td> + </tr> + + <tr> + <th colspan="3">URI types</th> + </tr> + <tr> + <td><code>anyURI</code></td> + <td><code>uri_pimpl</code></td> + <td><code>std::string</code> or <code>char*</code><br/> + <a href="#5.1">Section 5.1, "Standard Template Library"</a></td> + </tr> + + <tr> + <th colspan="3">binary types</th> + </tr> + <tr> + <td><code>base64Binary</code></td> + <td><code>base64_binary_pimpl</code></td> + <td><code>xml_schema::buffer*</code><br/> + <a href="#6.3">Section 6.3, "<code>base64Binary</code> and + <code>hexBinary</code> Parsers"</a></td> + </tr> + <tr> + <td><code>hexBinary</code></td> + <td><code>hex_binary_pimpl</code></td> + <td><code>xml_schema::buffer*</code><br/> + <a href="#6.3">Section 6.3, "<code>base64Binary</code> and + <code>hexBinary</code> Parsers"</a></td> + </tr> + + <tr> + <th colspan="3">date/time types</th> + </tr> + <tr> + <td><code>date</code></td> + <td><code>date_pimpl</code></td> + <td><code>xml_schema::date</code><br/><a href="#6.5">Section 6.5, + "<code>date</code> Parser"</a></td> + </tr> + <tr> + <td><code>dateTime</code></td> + <td><code>date_time_pimpl</code></td> + <td><code>xml_schema::date_time</code><br/><a href="#6.6">Section 6.6, + "<code>dateTime</code> Parser"</a></td> + </tr> + <tr> + <td><code>duration</code></td> + <td><code>duration_pimpl</code></td> + <td><code>xml_schema::duration</code><br/><a href="#6.7">Section 6.7, + "<code>duration</code> Parser"</a></td> + </tr> + <tr> + <td><code>gDay</code></td> + <td><code>gday_pimpl</code></td> + <td><code>xml_schema::gday</code><br/><a href="#6.8">Section 6.8, + "<code>gDay</code> Parser"</a></td> + </tr> + <tr> + <td><code>gMonth</code></td> + <td><code>gmonth_pimpl</code></td> + <td><code>xml_schema::gmonth</code><br/><a href="#6.9">Section 6.9, + "<code>gMonth</code> Parser"</a></td> + </tr> + <tr> + <td><code>gMonthDay</code></td> + <td><code>gmonth_day_pimpl</code></td> + <td><code>xml_schema::gmonth_day</code><br/><a href="#6.10">Section 6.10, + "<code>gMonthDay</code> Parser"</a></td> + </tr> + <tr> + <td><code>gYear</code></td> + <td><code>gyear_pimpl</code></td> + <td><code>xml_schema::gyear</code><br/><a href="#6.11">Section 6.11, + "<code>gYear</code> Parser"</a></td> + </tr> + <tr> + <td><code>gYearMonth</code></td> + <td><code>gyear_month_pimpl</code></td> + <td><code>xml_schema::gyear_month</code><br/><a href="#6.12">Section + 6.12, "<code>gYearMonth</code> Parser"</a></td> + </tr> + <tr> + <td><code>time</code></td> + <td><code>time_pimpl</code></td> + <td><code>xml_schema::time</code><br/><a href="#6.13">Section 6.13, + "<code>time</code> Parser"</a></td> + </tr> + + </table> + + <h2><a name="6.1">6.1 <code>QName</code> Parser</a></h2> + + <p>The return type of the <code>qname_pimpl</code> parser implementation + is either <code>xml_schema::qname</code> when STL is enabled + (<a href="#5.1">Section 5.1, "Standard Template Library"</a>) or + <code>xml_schema::qname*</code> when STL is disabled. The + <code>qname</code> class represents an XML qualified name. When the + return type is <code>xml_schema::qname*</code>, the returned + object is dynamically allocated with operator <code>new</code> + and should eventually be deallocated with operator <code>delete</code>. + With STL enabled, the <code>qname</code> type has the following + interface:</p> + + <pre class="c++"> +namespace xml_schema +{ + class qname + { + public: + // The default constructor creates an uninitialized object. + // Use modifiers to initialize it. + // + qname (); + + explicit + qname (const std::string& name); + qname (const std::string& prefix, const std::string& name); + + void + swap (qname&); + + const std::string& + prefix () const; + + std::string& + prefix (); + + void + prefix (const std::string&); + + const std::string& + name () const; + + std::string& + name (); + + void + name (const std::string&); + }; + + bool + operator== (const qname&, const qname&); + + bool + operator!= (const qname&, const qname&); +} + </pre> + + <p>When STL is disabled and C++ exceptions are enabled + (<a href="#5.3">Section 5.3, "C++ Exceptions"</a>), the + <code>qname</code> type has the following interface:</p> + + <pre class="c++"> +namespace xml_schema +{ + class qname + { + public: + // The default constructor creates an uninitialized object. + // Use modifiers to initialize it. + // + qname (); + + explicit + qname (char* name); + qname (char* prefix, char* name); + + void + swap (qname&); + + private: + qname (const qname&); + + qname& + operator= (const qname&); + + public: + char* + prefix (); + + const char* + prefix () const; + + void + prefix (char*); + + void + prefix_copy (const char*); + + char* + prefix_detach (); + + public: + char* + name (); + + const char* + name () const; + + void + name (char*); + + void + name_copy (const char*); + + char* + name_detach (); + }; + + bool + operator== (const qname&, const qname&); + + bool + operator!= (const qname&, const qname&); +} +</pre> + + <p>The modifier functions and constructors that have the <code>char*</code> + argument assume ownership of the passed strings which should be allocated + with operator <code>new char[]</code> and will be deallocated with + operator <code>delete[]</code> by the <code>qname</code> object. + If you detach the underlying prefix or name strings, then they + should eventually be deallocated with operator <code>delete[]</code>. + </p> + + <p>Finally, if both STL and C++ exceptions are disabled, the + <code>qname</code> type has the following interface:</p> + + <pre class="c++"> +namespace xml_schema +{ + class qname + { + public: + enum error + { + error_none, + error_no_memory + }; + + // The default constructor creates an uninitialized object. + // Use modifiers to initialize it. + // + qname (); + + explicit + qname (char* name); + qname (char* prefix, char* name); + + void + swap (qname&); + + private: + qname (const qname&); + + qname& + operator= (const qname&); + + public: + char* + prefix (); + + const char* + prefix () const; + + void + prefix (char*); + + error + prefix_copy (const char*); + + char* + prefix_detach (); + + public: + char* + name (); + + const char* + name () const; + + void + name (char*); + + error + name_copy (const char*); + + char* + name_detach (); + }; + + bool + operator== (const qname&, const qname&); + + bool + operator!= (const qname&, const qname&); +} + </pre> + + <h2><a name="6.2">6.2 <code>NMTOKENS</code> and <code>IDREFS</code> Parsers</a></h2> + + <p>The return type of the <code>nmtokens_pimpl</code> and + <code>idrefs_pimpl</code> parser implementations is + <code>xml_schema::string_sequence*</code>. + The returned object is dynamically allocated with operator + <code>new</code> and should eventually be deallocated with + operator <code>delete</code>. With STL and C++ exceptions enabled + (<a href="#5.1">Section 5.1, "Standard Template Library"</a>, + <a href="#5.3">Section 5.3, "C++ Exceptions"</a>), the + <code>string_sequence</code> type has the following interface:</p> + + <pre class="c++"> +namespace xml_schema +{ + class string_sequence + { + public: + typedef std::string value_type; + typedef std::string* pointer; + typedef const std::string* const_pointer; + typedef std::string& reference; + typedef const std::string& const_reference; + + typedef size_t size_type; + typedef ptrdiff_t difference_type; + + typedef std::string* iterator; + typedef const std::string* const_iterator; + + public: + string_sequence (); + + void + swap (string_sequence&); + + private: + string_sequence (string_sequence&); + + string_sequence& + operator= (string_sequence&); + + public: + iterator + begin (); + + const_iterator + begin () const; + + iterator + end (); + + const_iterator + end () const; + + std::string& + front (); + + const std::string& + front () const; + + std::string& + back (); + + const std::string& + back () const; + + std::string& + operator[] (size_t); + + const std::string& + operator[] (size_t) const; + + public: + bool + empty () const; + + size_t + size () const; + + size_t + capacity () const; + + size_t + max_size () const; + + public: + void + clear (); + + void + pop_back (); + + iterator + erase (iterator); + + void + push_back (const std::string&); + + iterator + insert (iterator, const std::string&); + + void + reserve (size_t); + }; + + bool + operator== (const string_sequence&, const string_sequence&); + + bool + operator!= (const string_sequence&, const string_sequence&); +} + </pre> + + <p>When STL is enabled and C++ exceptions are disabled, the signatures + of the <code>push_back()</code>, <code>insert()</code>, and + <code>reserve()</code> functions change as follows:</p> + + <pre class="c++"> +namespace xml_schema +{ + class string_sequence + { + public: + enum error + { + error_none, + error_no_memory + }; + + ... + + public: + error + push_back (const std::string&); + + error + insert (iterator, const std::string&); + + error + insert (iterator, const std::string&, iterator& result); + + error + reserve (size_t); + }; +} + </pre> + + <p>When STL is disabled and C++ exceptions are enabled, the + <code>string_sequence</code> type has the following interface:</p> + + <pre class="c++"> +namespace xml_schema +{ + class string_sequence + { + public: + typedef char* value_type; + typedef char** pointer; + typedef const char** const_pointer; + typedef char* reference; + typedef const char* const_reference; + + typedef size_t size_type; + typedef ptrdiff_t difference_type; + + typedef char** iterator; + typedef const char* const* const_iterator; + + string_sequence (); + + void + swap (string_sequence&); + + private: + string_sequence (string_sequence&); + + string_sequence& + operator= (string_sequence&); + + public: + iterator + begin (); + + const_iterator + begin () const; + + iterator + end (); + + const_iterator + end () const; + + char* + front (); + + const char* + front () const; + + char* + back (); + + const char* + back () const; + + char* + operator[] (size_t); + + const char* + operator[] (size_t) const; + + public: + bool + empty () const; + + size_t + size () const; + + size_t + capacity () const; + + size_t + max_size () const; + + public: + void + clear (); + + void + pop_back (); + + iterator + erase (iterator); + + void + push_back (char*); + + void + push_back_copy (const char*); + + iterator + insert (iterator, char*); + + void + reserve (size_t); + + // Detach a string from the sequence at a given position. + // The string pointer at this position in the sequence is + // set to 0. + // + char* + detach (iterator); + }; + + bool + operator== (const string_sequence&, const string_sequence&); + + bool + operator!= (const string_sequence&, const string_sequence&); +} + </pre> + + <p>The <code>push_back()</code> and <code>insert()</code> functions + assume ownership of the passed string which should be allocated + with operator <code>new char[]</code> and will be deallocated + with operator <code>delete[]</code> by the <code>string_sequence</code> + object. These two functions free the passed object if the reallocation + of the underlying sequence buffer fails. The <code>push_back_copy()</code> + function makes a copy of the passed string. + If you detach the underlying element string, then it should + eventually be deallocated with operator <code>delete[]</code>.</p> + + <p>When both STL and C++ exceptions are disabled, the signatures + of the <code>push_back()</code>, <code>push_back_copy()</code>, + <code>insert()</code>, and <code>reserve()</code> functions change + as follows:</p> + + <pre class="c++"> +namespace xml_schema +{ + class string_sequence + { + public: + enum error + { + error_none, + error_no_memory + }; + + ... + + public: + error + push_back (char*); + + error + push_back_copy (const char*); + + error + insert (iterator, char*); + + error + insert (iterator, char*, iterator& result); + + error + reserve (size_t); + }; +} + </pre> + + + <h2><a name="6.3">6.3 <code>base64Binary</code> and <code>hexBinary</code> Parsers</a></h2> + + <p>The return type of the <code>base64_binary_pimpl</code> and + <code>hex_binary_pimpl</code> parser implementations is + <code>xml_schema::buffer*</code>. The returned object is + dynamically allocated with operator <code>new</code> and + should eventually be deallocated with operator <code>delete</code>. + With C++ exceptions enabled (<a href="#5.3">Section 5.3, "C++ + Exceptions"</a>), the <code>buffer</code> type has the following + interface:</p> + + <pre class="c++"> +namespace xml_schema +{ + class buffer + { + public: + class bounds {}; // Out of bounds exception. + + public: + buffer (); + + explicit + buffer (size_t size); + buffer (size_t size, size_t capacity); + buffer (const void* data, size_t size); + buffer (const void* data, size_t size, size_t capacity); + + enum ownership_value { assume_ownership }; + + // This constructor assumes ownership of the memory passed. + // + buffer (void* data, size_t size, size_t capacity, ownership_value); + + private: + buffer (const buffer&); + + buffer& + operator= (const buffer&); + + public: + void + attach (void* data, size_t size, size_t capacity); + + void* + detach (); + + void + swap (buffer&); + + public: + size_t + capacity () const; + + bool + capacity (size_t); + + public: + size_t + size () const; + + bool + size (size_t); + + public: + const char* + data () const; + + char* + data (); + + const char* + begin () const; + + char* + begin (); + + const char* + end () const; + + char* + end (); + }; + + bool + operator== (const buffer&, const buffer&); + + bool + operator!= (const buffer&, const buffer&); +} + </pre> + + <p>The last constructor and the <code>attach()</code> member function + make the <code>buffer</code> instance assume the ownership of the + memory block pointed to by the <code>data</code> argument and + eventually release it by calling <code>operator delete()</code>. + The <code>detach()</code> member function detaches and returns the + underlying memory block which should eventually be released by + calling <code>operator delete()</code>. + </p> + + <p>The <code>capacity()</code> and <code>size()</code> modifier functions + return <code>true</code> if the underlying buffer has moved. The + <code>bounds</code> exception is thrown if the constructor or + <code>attach()</code> member function arguments violate the + <code>(size <= capacity)</code> constraint.</p> + + <p>If C++ exceptions are disabled, the <code>buffer</code> type has + the following interface:</p> + + <pre class="c++"> +namespace xml_schema +{ + class buffer + { + public: + enum error + { + error_none, + error_bounds, + error_no_memory + }; + + buffer (); + + private: + buffer (const buffer&); + + buffer& + operator= (const buffer&); + + public: + error + attach (void* data, size_t size, size_t capacity); + + void* + detach (); + + void + swap (buffer&); + + public: + size_t + capacity () const; + + error + capacity (size_t); + + error + capacity (size_t, bool& moved); + + public: + size_t + size () const; + + error + size (size_t); + + error + size (size_t, bool& moved); + + public: + const char* + data () const; + + char* + data (); + + const char* + begin () const; + + char* + begin (); + + const char* + end () const; + + char* + end (); + }; + + bool + operator== (const buffer&, const buffer&); + + bool + operator!= (const buffer&, const buffer&); +} + </pre> + + <h2><a name="6.4">6.4 Time Zone Representation</a></h2> + + <p>The <code>date</code>, <code>dateTime</code>, <code>gDay</code>, + <code>gMonth</code>, <code>gMonthDay</code>, <code>gYear</code>, + <code>gYearMonth</code>, and <code>time</code> XML Schema built-in + types all include an optional time zone component. The following + <code>xml_schema::time_zone</code> base class is used to represent + this information:</p> + + <pre class="c++"> +namespace xml_schema +{ + class time_zone + { + public: + time_zone (); + time_zone (short hours, short minutes); + + bool + zone_present () const; + + void + zone_reset (); + + short + zone_hours () const; + + void + zone_hours (short); + + short + zone_minutes () const; + + void + zone_minutes (short); + }; + + bool + operator== (const time_zone&, const time_zone&); + + bool + operator!= (const time_zone&, const time_zone&); +} + </pre> + + <p>The <code>zone_present()</code> accessor function returns <code>true</code> + if the time zone is specified. The <code>zone_reset()</code> modifier + function resets the time zone object to the <em>not specified</em> + state. If the time zone offset is negative then both hours and + minutes components are represented as negative integers.</p> + + <h2><a name="6.5">6.5 <code>date</code> Parser</a></h2> + + <p>The return type of the <code>date_pimpl</code> parser implementation + is <code>xml_schema::date</code> which represents a year, a day, and a month + with an optional time zone. Its interface is presented below. For + more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class date: public time_zone + { + public: + // The default constructor creates an uninitialized object. + // Use modifiers to initialize it. + // + date (); + + date (int year, unsigned short month, unsigned short day); + + date (int year, unsigned short month, unsigned short day, + short zone_hours, short zone_minutes); + + int + year () const; + + void + year (int); + + unsigned short + month () const; + + void + month (unsigned short); + + unsigned short + day () const; + + void + day (unsigned short); + }; + + bool + operator== (const date&, const date&); + + bool + operator!= (const date&, const date&); +} + </pre> + + <h2><a name="6.6">6.6 <code>dateTime</code> Parser</a></h2> + + <p>The return type of the <code>date_time_pimpl</code> parser implementation + is <code>xml_schema::date_time</code> which represents a year, a month, a day, + hours, minutes, and seconds with an optional time zone. Its interface + is presented below. For more information on the base + <code>xml_schema::time_zone</code> class refer to <a href="#6.4">Section + 6.4, "Time Zone Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class date_time: public time_zone + { + public: + // The default constructor creates an uninitialized object. + // Use modifiers to initialize it. + // + date_time (); + + date_time (int year, unsigned short month, unsigned short day, + unsigned short hours, unsigned short minutes, + double seconds); + + date_time (int year, unsigned short month, unsigned short day, + unsigned short hours, unsigned short minutes, + double seconds, short zone_hours, short zone_minutes); + + int + year () const; + + void + year (int); + + unsigned short + month () const; + + void + month (unsigned short); + + unsigned short + day () const; + + void + day (unsigned short); + + unsigned short + hours () const; + + void + hours (unsigned short); + + unsigned short + minutes () const; + + void + minutes (unsigned short); + + double + seconds () const; + + void + seconds (double); + }; + + bool + operator== (const date_time&, const date_time&); + + bool + operator!= (const date_time&, const date_time&); +} + </pre> + + <h2><a name="6.7">6.7 <code>duration</code> Parser</a></h2> + + <p>The return type of the <code>duration_pimpl</code> parser implementation + is <code>xml_schema::duration</code> which represents a potentially + negative duration in the form of years, months, days, hours, minutes, + and seconds. Its interface is presented below.</p> + + <pre class="c++"> +namespace xml_schema +{ + class duration + { + public: + // The default constructor creates an uninitialized object. + // Use modifiers to initialize it. + // + duration (); + + duration (bool negative, + unsigned int years, unsigned int months, unsigned int days, + unsigned int hours, unsigned int minutes, double seconds); + + bool + negative () const; + + void + negative (bool); + + unsigned int + years () const; + + void + years (unsigned int); + + unsigned int + months () const; + + void + months (unsigned int); + + unsigned int + days () const; + + void + days (unsigned int); + + unsigned int + hours () const; + + void + hours (unsigned int); + + unsigned int + minutes () const; + + void + minutes (unsigned int); + + double + seconds () const; + + void + seconds (double); + }; + + bool + operator== (const duration&, const duration&); + + bool + operator!= (const duration&, const duration&); +} + </pre> + + + <h2><a name="6.8">6.8 <code>gDay</code> Parser</a></h2> + + <p>The return type of the <code>gday_pimpl</code> parser implementation + is <code>xml_schema::gday</code> which represents a day of the month with + an optional time zone. Its interface is presented below. For + more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class gday: public time_zone + { + public: + // The default constructor creates an uninitialized object. + // Use modifiers to initialize it. + // + gday (); + + explicit + gday (unsigned short day); + + gday (unsigned short day, short zone_hours, short zone_minutes); + + unsigned short + day () const; + + void + day (unsigned short); + }; + + bool + operator== (const gday&, const gday&); + + bool + operator!= (const gday&, const gday&); +} + </pre> + + <h2><a name="6.9">6.9 <code>gMonth</code> Parser</a></h2> + + <p>The return type of the <code>gmonth_pimpl</code> parser implementation + is <code>xml_schema::gmonth</code> which represents a month of the year + with an optional time zone. Its interface is presented below. For + more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class gmonth: public time_zone + { + public: + // The default constructor creates an uninitialized object. + // Use modifiers to initialize it. + // + gmonth (); + + explicit + gmonth (unsigned short month); + + gmonth (unsigned short month, + short zone_hours, short zone_minutes); + + unsigned short + month () const; + + void + month (unsigned short); + }; + + bool + operator== (const gmonth&, const gmonth&); + + bool + operator!= (const gmonth&, const gmonth&); +} + </pre> + + <h2><a name="6.10">6.10 <code>gMonthDay</code> Parser</a></h2> + + <p>The return type of the <code>gmonth_day_pimpl</code> parser implementation + is <code>xml_schema::gmonth_day</code> which represents a day and a month of + the year with an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class gmonth_day: public time_zone + { + public: + // The default constructor creates an uninitialized object. + // Use modifiers to initialize it. + // + gmonth_day (); + + gmonth_day (unsigned short month, unsigned short day); + + gmonth_day (unsigned short month, unsigned short day, + short zone_hours, short zone_minutes); + + unsigned short + month () const; + + void + month (unsigned short); + + unsigned short + day () const; + + void + day (unsigned short); + }; + + bool + operator== (const gmonth_day&, const gmonth_day&); + + bool + operator!= (const gmonth_day&, const gmonth_day&); +} + </pre> + + <h2><a name="6.11">6.11 <code>gYear</code> Parser</a></h2> + + <p>The return type of the <code>gyear_pimpl</code> parser implementation + is <code>xml_schema::gyear</code> which represents a year with + an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class gyear: public time_zone + { + public: + // The default constructor creates an uninitialized object. + // Use modifiers to initialize it. + // + gyear (); + + explicit + gyear (int year); + + gyear (int year, short zone_hours, short zone_minutes); + + int + year () const; + + void + year (int); + }; + + bool + operator== (const gyear&, const gyear&); + + bool + operator!= (const gyear&, const gyear&); +} + </pre> + + <h2><a name="6.12">6.12 <code>gYearMonth</code> Parser</a></h2> + + <p>The return type of the <code>gyear_month_pimpl</code> parser implementation + is <code>xml_schema::gyear_month</code> which represents a year and a month + with an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class gyear_month: public time_zone + { + public: + // The default constructor creates an uninitialized object. + // Use modifiers to initialize it. + // + gyear_month (); + + gyear_month (int year, unsigned short month); + + gyear_month (int year, unsigned short month, + short zone_hours, short zone_minutes); + + int + year () const; + + void + year (int); + + unsigned short + month () const; + + void + month (unsigned short); + }; + + bool + operator== (const gyear_month&, const gyear_month&); + + bool + operator!= (const gyear_month&, const gyear_month&); +} + </pre> + + + <h2><a name="6.13">6.13 <code>time</code> Parser</a></h2> + + <p>The return type of the <code>time_pimpl</code> parser implementation + is <code>xml_schema::time</code> which represents hours, minutes, + and seconds with an optional time zone. Its interface is presented below. + For more information on the base <code>xml_schema::time_zone</code> + class refer to <a href="#6.4">Section 6.4, "Time Zone + Representation"</a>.</p> + + <pre class="c++"> +namespace xml_schema +{ + class time: public time_zone + { + public: + // The default constructor creates an uninitialized object. + // Use modifiers to initialize it. + // + time (); + + time (unsigned short hours, unsigned short minutes, double seconds); + + time (unsigned short hours, unsigned short minutes, double seconds, + short zone_hours, short zone_minutes); + + unsigned short + hours () const; + + void + hours (unsigned short); + + unsigned short + minutes () const; + + void + minutes (unsigned short); + + double + seconds () const; + + void + seconds (double); + }; + + bool + operator== (const time&, const time&); + + bool + operator!= (const time&, const time&); +} + </pre> + + + <!-- Error Handling --> + + + <h1><a name="7">7 Document Parser and Error Handling</a></h1> + + <p>In this chapter we will discuss the <code>xml_schema::document_pimpl</code> + type, the error handling mechanisms provided by the mapping, as well + as how to reuse a parser after an error has occurred.</p> + + <p>There are four categories of errors that can result from running + a parser on an XML instance: system, xml, schema, and application. + The system category contains memory allocation and file/stream + operation errors. The xml category is for XML parsing and + well-formedness checking errors. Similarly, the schema category is + for XML Schema validation errors. Finally, the application category + is for application logic errors that you may want to propagate + from parser implementations to the caller of the parser. + </p> + + <p>The C++/Parser mapping supports two methods of reporting errors: + using C++ exceptions and with error codes. The method used depends + on whether or not you have configured the XSD/e runtime and + the generated code with C++ exceptions enabled, as described + in <a href="#5.3">Section 5.3, "C++ Exceptions"</a>.</p> + + <h2><a name="7.1">7.1 Document Parser</a></h2> + + <p>The <code>xml_schema::document_pimpl</code> parser is a root parser for + the vocabulary. As mentioned in <a href="#3.4">Section 3.4, + "Connecting the Parsers Together"</a>, its interface varies depending + on the mapping configuration (<a href="#5">Chapter 5, "Mapping + Configuration"</a>). When STL and the iostream library are + enabled, the <code>xml_schema::document_pimpl</code> class has the + following interface:</p> + + <pre class="c++"> +namespace xml_schema +{ + class parser_base; + + class document_pimpl + { + public: + document_pimpl (parser_base&, + const char* root_element_name); + + document_pimpl (parser_base&, + const char* root_element_namespace, + const char* root_element_name); + + document_pimpl (parser_base&, + const std::string& root_element_name); + + document_pimpl (parser_base&, + const std::string& root_element_namespace, + const std::string& root_element_name); + + + public: + // Parse a local file. The file is accessed with std::ifstream + // in binary mode. The std::ios_base::failure exception is used + // to report io errors (badbit and failbit) if exceptions are + // enabled. Otherwise error codes are used. + // + void + parse (const char* file); + + void + parse (const std::string& file); + + // Parse std::istream. std::ios_base::failure exception is used + // to report io errors (badbit and failbit) if exceptions are + // enabled. Otherwise error codes are used. + // + void + parse (std::istream&); + + // Parse a chunk of input. You can call this function multiple + // times with the last call having the last argument true. + // + void + parse (const void* data, size_t size, bool last); + + // Low-level Expat-specific parsing API. + // + void + parse_begin (XML_Parser); + + void + parse_end (); + }; +} + </pre> + + <p>When the use of STL is disabled, the constructors and the <code>parse()</code> + function that use <code>std::string</code> in their signatures + are not available. When the use of iostream is disabled, the + <code>parse()</code> functions that parse a local file and + <code>std::istream</code> are not available.</p> + + <p>When support for XML Schema polymorphism is enabled, the + overloaded <code>document_pimpl</code> constructors have + additional arguments which control polymorphic parsing. + For more information refer to <a href="#5.7">Section 5.7, + "Support for Polymorphism"</a>. + </p> + + <p>The first argument to all overloaded constructors is the + parser for the type of the root element. The <code>parser_base</code> + class is the base type for all parser skeletons. The second and + third arguments to the <code>document_pimpl</code>'s constructors are + the root element's name and namespace.</p> + + <p>The <code>parse_begin()</code> and <code>parse_end()</code> functions + present a low-level, Expat-specific parsing API for maximum control. + A typical use case would look like this (pseudo-code):</p> + + <pre class="c++"> +xxx_pimpl root_p; +document_pimpl doc_p (root_p, "root"); + +root_p.pre (); +doc_p.parse_begin (xml_parser); + +while (more_stuff_to_parse) +{ + // Call XML_Parse or XML_ParseBuffer: + // + if (XML_Parse (...) != XML_STATUS_ERROR) + break; +} + +doc_p.parse_end (); +result_type result (root_p.post_xxx ()); + </pre> + + <p>Note that if your vocabulary use XML namespaces, the + <code>XML_ParserCreateNS()</code> functions should be used to create + the XML parser. Space (<code>XML_Char (' ')</code>) should be used + as a separator (the second argument to <code>XML_ParserCreateNS()</code>). + Furthermore, if XML_Parse or XML_ParseBuffer fail, call + <code>parse_end()</code> to determine the error which is indicated + either via exception or set as an error code. + </p> + + <p>The error handling mechanisms employed by the <code>document_pimpl</code> + parser are described in <a href="#7.2">Section 7.2, "Exceptions"</a> + and <a href="#7.3">Section 7.3, "Error Codes"</a>.</p> + + <h2><a name="7.2">7.2 Exceptions</a></h2> + + <p>When C++ exceptions are used for error reporting, the system + errors are mapped to the standard exceptions. The out of memory + condition is indicated by throwing an instance + of <code>std::bad_alloc</code>. The stream operation errors + are reported by throwing an instance of + <code>std::ios_base::failure</code>.</p> + + <p>The xml and schema errors are reported by throwing the + <code>xml_schema::parser_xml</code> and <code>xml_schema::parser_schema</code> + exceptions, respectively. These two exceptions derive from + <code>xml_schema::parser_exception</code> which, in turn, derives + from <code>std::exception</code>. As a result, you can handle + any error from these two categories by either catching + <code>std::exception</code>, <code>xml_schema::parser_exception</code>, + or individual exceptions. The further down the hierarchy you go + the more detailed error information is available to you. The + following listing shows the definitions of these exceptions:</p> + + <pre class="c++"> +namespace xml_schema +{ + class parser_exception: public std::exception + { + public: + unsigned long + line () const; + + unsigned long + column () const; + + virtual const char* + text () const = 0; + + ... + }; + + std::ostream& + operator<< (std::ostream&, const parser_exception&); + + + typedef <implementation-details> parser_xml_error; + + class parser_xml: public parser_exception + { + public: + parser_xml_error + code () const; + + virtual const char* + text () const; + + virtual const char* + what () const throw (); + + ... + }; + + + typedef <implementation-details> parser_schema_error; + + class parser_schema: public parser_exception + { + public: + parser_schema_error + code () const; + + virtual const char* + text () const; + + virtual const char* + what () const throw (); + + ... + }; +} + </pre> + + <p>The <code>parser_xml_error</code> and <code>parser_schema_error</code> + are implementation-specific error code types. The + <code>operator<<</code> defined for the <code>parser_exception</code> + class simply prints the error description as returned by the + <code>text()</code> function. The following example shows + how we can catch these exceptions:</p> + + <pre class="c++"> +int +main (int argc, char* argv[]) +{ + try + { + // Parse argv[1]. + } + catch (const xml_schema::parser_exception& e) + { + cout << argv[1] << ":" << e.line () << ":" << e.column () + << ": error: " << e.text () << endl; + return 1; + } +} + </pre> + + <p>Finally, for reporting application errors from parsing callbacks, you + can throw any exceptions of your choice. They are propagated to + the caller of the parser without any alterations.</p> + + <h2><a name="7.3">7.3 Error Codes</a></h2> + + <p>When C++ exceptions are not available, error codes are used to + report error conditions. Each parser skeleton and the root + <code>document_pimpl</code> parser have the following member + function for querying + the error status:</p> + + <pre class="c++"> +xml_schema::parser_error +_error () const; + </pre> + + <p>To handle all possible error conditions, you will need to obtain + the error status after calls to: the <code>document_pimpl</code>'s + constructor (it performs memory allocations which may fail), the + root parser <code>pre()</code> callback, each call to the <code>parse()</code> + function, and, finally, the call to the root parser + <code>post_*()</code> callback. The definition of + <code>xml_schema::parser_error</code> class is presented below:</p> + + <pre class="c++"> +namespace xml_schema +{ + class sys_error + { + public: + enum value + { + none, + no_memory, + open_failed, + read_failed, + write_failed + }; + + sys_error (value); + + operator value () const; + + static const char* + text (value); + + ... + }; + + typedef <implementation-details> parser_xml_error; + typedef <implementation-details> parser_schema_error; + + class parser_error + { + public: + enum error_type + { + none, + sys, + xml, + schema, + app + }; + + error_type + type () const; + + // Line and column are only available for xml, schema, and + // app errors. + // + unsigned long + line () const; + + unsigned long + column () const; + + // Returns true if there is an error so that you can write + // if (p.error ()) or if (error e = p.error ()). + // + typedef void (error::*bool_convertible) (); + operator bool_convertible () const; + + // system + // + sys_error + sys_code () const; + + const char* + sys_text () const; + + // xml + // + parser_xml_error + xml_code () const; + + const char* + xml_text () const; + + // schema + // + parser_schema_error + schema_code () const; + + const char* + schema_text () const; + + // app + // + int + app_code () const; + + ... + }; +} + </pre> + + <p>The <code>parser_xml_error</code> and <code>parser_schema_error</code> + are implementation-specific error code types. The + <code>parser_error</code> class incorporates four categories of errors + which you can query by calling the <code>type()</code> function. + The following example shows how to handle error conditions with + error codes. It is based on the person record example presented + in <a href="#3">Chapter 3, "Parser Skeletons"</a>.</p> + + <pre class="c++"> +int +main (int argc, char* argv[]) +{ + // Construct the parser. + // + xml_schema::short_pimpl short_p; + xml_schema::string_pimpl string_p; + + gender_pimpl gender_p; + person_pimpl person_p; + people_pimpl people_p; + + person_p.parsers (string_p, string_p, gender_p, short_p); + people_p.parsers (person_p); + + // Parse. + // + using xml_schema::parser_error; + parser_error e; + + do + { + xml_schema::document_pimpl doc_p (people_p, "people"); + if (e = doc_p._error ()) + break; + + people_p.pre (); + if (e = people_p._error ()) + break; + + doc_p.parse (argv[1]); + if (e = doc_p._error ()) + break; + + people_p.post_people (); + e = people_p._error (); + + } while (false); + + // Handle errors. + // + if (e) + { + switch (e.type ()) + { + case parser_error::sys: + { + cerr << argv[1] << ": error: " << e.sys_text () << endl; + break; + } + case parser_error::xml: + { + cerr << argv[1] << ":" << e.line () << ":" << e.column () + << ": error: " << e.xml_text () << endl; + break; + } + case parser_error::schema: + { + cerr << argv[1] << ":" << e.line () << ":" << e.column () + << ": error: " << e.schema_text () << endl; + break; + } + case parser_error::app: + { + cerr << argv[1] << ":" << e.line () << ":" << e.column () + << ": application error " << e.app_code () << endl; + break; + } + } + return 1; + } +} + </pre> + + <p>The error type for application errors is <code>int</code> with + the value <code>0</code> indicated the absence of error. You can + set the application error by calling the <code>_app_error()</code> + function inside a parser callback. For example, if it was invalid to + have a person younger than 18 in our people catalog, then we could + have implemented this check as follows: </p> + + <pre class="c++"> +class person_pimpl: public person_pskel +{ +public: + virtual void + age (short a) + { + if (a < 18) + _app_error (1); + } + + ... +}; + </pre> + + <p>You can also set a system error by calling the <code>_sys_error()</code> + function inside a parser callback. This function has one argument of type + <code>xml_schema::sys_error</code> which was presented above. For + example:</p> + + <pre class="c++"> +class person_pimpl: public person_pskel +{ +public: + virtual void + pre () + { + p_ = new person (); + + if (p_ == 0) + _sys_error (xml_schema::sys_error::no_memory); + } + + ... + +private: + person* p_; +}; + </pre> + + + <h2><a name="7.4">7.4 Reusing Parsers after an Error</a></h2> + + <p>After a successful execution a parser returns into the initial + state and can be used to parse another document without any + extra actions. On the other hand, if an error occurred during + parsing and you would like to reuse the parser to parse another + document, you need to explicitly reset it into the initial + state as shown in the following code fragment:</p> + + <pre class="c++"> +int +main () +{ + ... + + std::vector<std::string> files = ... + + xml_schema::document_pimpl doc_p (people_p, "people"); + + for (size_t i = 0; i < files.size (); ++i) + { + try + { + people_p.pre (); + doc_p.parse (files[i]); + people_p.post_people (); + } + catch (const xml_schema::parser_exception&) + { + doc_p.reset (); + } + } +} + </pre> + + <p>If you do not need to reuse parsers after an error for example + because your application terminates or you create a new parser + instance in such situations, then you can avoid generating + parser reset code by specifying the <code>--suppress-reset</code> + XSD/e compiler option.</p> + + <p>Your individual parser implementations may also require extra + actions in order to bring them into a usable state after an + error. To accomplish this you can override the <code>_reset()</code> + virtual function as shown below. Notice that when you override the + <code>_reset()</code> function in your implementation, you should + always call the base skeleton version to allow it to reset + its state:</p> + +<pre class="c++"> +class person_pimpl: public person_pskel +{ +public: + virtual void + pre () + { + p_ = new person (); + } + + virtual void + _reset () + { + person_pskel::_reset (); + delete p_; + p_ = 0; + } + + ... + +private: + person* p_; +}; + </pre> + + <p>Note also that the <code>_reset()</code> mechanism is used only when + an error has occurred. To make sure that your parser implementations + arrive at the initial state during successful execution, use the + initialization (<code>pre()</code> and <code>_pre()</code>) and + finalization (<code>post_*()</code> and <code>_post()</code>) + callbacks.</p> + + <!-- Appendix A --> + + + <h1><a name="A">Appendix A — Supported XML Schema Constructs</a></h1> + + <p>The Embedded C++/Parser mapping supports validation of the following + W3C XML Schema constructs in the generated code.</p> + + <!-- border="1" is necessary for html2ps --> + <table id="features" border="1"> + <tr><th>Construct</th><th>Notes</th></tr> + <tr><th colspan="2">Structure</th></tr> + + <tr><td>element</td><td></td></tr> + <tr><td>attribute</td><td></td></tr> + + <tr><td>any</td><td></td></tr> + <tr><td>anyAttribute</td><td></td></tr> + + <tr><td>all</td><td></td></tr> + <tr><td>sequence</td><td></td></tr> + <tr><td>choice</td><td></td></tr> + + <tr><td>complex type, empty content</td><td></td></tr> + <tr><td>complex type, mixed content</td><td></td></tr> + <tr><td>complex type, simple content extension</td><td></td></tr> + <tr><td>complex type, simple content restriction</td> + <td>Simple type facets are not validated.</td></tr> + <tr><td>complex type, complex content extension</td><td></td></tr> + <tr><td>complex type, complex content restriction</td><td></td></tr> + + <tr><td>list</td><td></td></tr> + + <tr><th colspan="2">Datatypes</th></tr> + + <tr><td>byte</td><td></td></tr> + <tr><td>unsignedByte</td><td></td></tr> + <tr><td>short</td><td></td></tr> + <tr><td>unsignedShort</td><td></td></tr> + <tr><td>int</td><td></td></tr> + <tr><td>unsignedInt</td><td></td></tr> + <tr><td>long</td><td></td></tr> + <tr><td>unsignedLong</td><td></td></tr> + <tr><td>integer</td><td></td></tr> + <tr><td>nonPositiveInteger</td><td></td></tr> + <tr><td>nonNegativeInteger</td><td></td></tr> + <tr><td>positiveInteger</td><td></td></tr> + <tr><td>negativeInteger</td><td></td></tr> + + <tr><td>boolean</td><td></td></tr> + + <tr><td>float</td><td></td></tr> + <tr><td>double</td><td></td></tr> + <tr><td>decimal</td><td></td></tr> + + <tr><td>string</td><td></td></tr> + <tr><td>normalizedString</td><td></td></tr> + <tr><td>token</td><td></td></tr> + <tr><td>Name</td><td></td></tr> + <tr><td>NMTOKEN</td><td></td></tr> + <tr><td>NCName</td><td></td></tr> + <tr><td>language</td><td></td></tr> + <tr><td>anyURI</td><td></td></tr> + + <tr><td>ID</td><td>Identity constraint is not enforced.</td></tr> + <tr><td>IDREF</td><td>Identity constraint is not enforced.</td></tr> + + <tr><td>NMTOKENS</td><td></td></tr> + <tr><td>IDREFS</td><td>Identity constraint is not enforced.</td></tr> + + <tr><td>QName</td><td></td></tr> + + <tr><td>base64Binary</td><td></td></tr> + <tr><td>hexBinary</td><td></td></tr> + + <tr><td>date</td><td></td></tr> + <tr><td>dateTime</td><td></td></tr> + <tr><td>duration</td><td></td></tr> + <tr><td>gDay</td><td></td></tr> + <tr><td>gMonth</td><td></td></tr> + <tr><td>gMonthDay</td><td></td></tr> + <tr><td>gYear</td><td></td></tr> + <tr><td>gYearMonth</td><td></td></tr> + <tr><td>time</td><td></td></tr> + </table> + + </div> +</div> + + +</body> +</html> |