summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2014-07-04 11:15:44 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2014-07-04 11:15:44 +0200
commit57fe13de5c01bc8e5bbeb39acd131c9329245261 (patch)
tree1196307d17664b97af750778488e4798d0dbf8dc
parenta1bc52f9ae499a672b05b3264f82a24637a16a02 (diff)
Update streaming example with new implementation
-rw-r--r--dist/examples/cxx/tree/streaming/makefile5
-rw-r--r--dist/examples/cxx/tree/streaming/streaming-vc10.vcxproj4
-rw-r--r--dist/examples/cxx/tree/streaming/streaming-vc10.vcxproj.filters8
-rw-r--r--dist/examples/cxx/tree/streaming/streaming-vc11.vcxproj4
-rw-r--r--dist/examples/cxx/tree/streaming/streaming-vc11.vcxproj.filters8
-rw-r--r--dist/examples/cxx/tree/streaming/streaming-vc12.vcxproj4
-rw-r--r--dist/examples/cxx/tree/streaming/streaming-vc12.vcxproj.filters8
-rw-r--r--dist/examples/cxx/tree/streaming/streaming-vc8.vcproj6
-rw-r--r--dist/examples/cxx/tree/streaming/streaming-vc9.vcproj6
-rw-r--r--examples/cxx/tree/streaming/README23
-rw-r--r--examples/cxx/tree/streaming/driver.cxx166
-rw-r--r--examples/cxx/tree/streaming/grammar-input-stream.cxx96
-rw-r--r--examples/cxx/tree/streaming/grammar-input-stream.hxx41
-rw-r--r--examples/cxx/tree/streaming/makefile6
-rw-r--r--examples/cxx/tree/streaming/parser.cxx169
-rw-r--r--examples/cxx/tree/streaming/parser.hxx46
-rw-r--r--examples/cxx/tree/streaming/serializer.cxx492
-rw-r--r--examples/cxx/tree/streaming/serializer.hxx91
18 files changed, 974 insertions, 209 deletions
diff --git a/dist/examples/cxx/tree/streaming/makefile b/dist/examples/cxx/tree/streaming/makefile
index ed32b83..4c7acd2 100644
--- a/dist/examples/cxx/tree/streaming/makefile
+++ b/dist/examples/cxx/tree/streaming/makefile
@@ -9,12 +9,13 @@ override XSDFLAGS += --generate-serialization
# Build.
#
-driver: driver.o parser.o serializer.o position.o
+driver: driver.o parser.o serializer.o grammar-input-stream.o position.o
position.o: position.cxx position.hxx
driver.o: driver.cxx position.hxx parser.hxx serializer.hxx
parser.o: parser.cxx parser.hxx
serializer.o: serializer.cxx serializer.hxx
+grammar-input-stream.o: grammar-input-stream.cxx grammar-input-stream.hxx
position.cxx position.hxx: position.xsd
@@ -30,4 +31,4 @@ test: driver position.xml
#
.PHONY: clean
clean:
- rm -f position.o position.?xx parser.o serializer.o driver.o driver out.xml
+ rm -f position.o position.?xx parser.o serializer.o grammar-input-stream.o driver.o driver out.xml
diff --git a/dist/examples/cxx/tree/streaming/streaming-vc10.vcxproj b/dist/examples/cxx/tree/streaming/streaming-vc10.vcxproj
index fb74ff1..627ddcc 100644
--- a/dist/examples/cxx/tree/streaming/streaming-vc10.vcxproj
+++ b/dist/examples/cxx/tree/streaming/streaming-vc10.vcxproj
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
@@ -182,11 +182,13 @@
<ClCompile Include="position.cxx" />
<ClCompile Include="parser.cxx" />
<ClCompile Include="serializer.cxx" />
+ <ClCompile Include="grammar-input-stream.cxx" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="position.hxx" />
<ClInclude Include="parser.hxx" />
<ClInclude Include="serializer.hxx" />
+ <ClInclude Include="grammar-input-stream.hxx" />
</ItemGroup>
<ItemGroup>
<CustomBuild Include="position.xsd">
diff --git a/dist/examples/cxx/tree/streaming/streaming-vc10.vcxproj.filters b/dist/examples/cxx/tree/streaming/streaming-vc10.vcxproj.filters
index cf8b190..3597703 100644
--- a/dist/examples/cxx/tree/streaming/streaming-vc10.vcxproj.filters
+++ b/dist/examples/cxx/tree/streaming/streaming-vc10.vcxproj.filters
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
@@ -27,6 +27,9 @@
<ClCompile Include="serializer.cxx">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="grammar-input-stream.cxx">
+ <Filter>Source Files</Filter>
+ </ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="position.hxx">
@@ -38,6 +41,9 @@
<ClInclude Include="serializer.hxx">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="grammar-input-stream.hxx">
+ <Filter>Header Files</Filter>
+ </ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="Readme.txt" />
diff --git a/dist/examples/cxx/tree/streaming/streaming-vc11.vcxproj b/dist/examples/cxx/tree/streaming/streaming-vc11.vcxproj
index cb16610..01ce1db 100644
--- a/dist/examples/cxx/tree/streaming/streaming-vc11.vcxproj
+++ b/dist/examples/cxx/tree/streaming/streaming-vc11.vcxproj
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
@@ -186,11 +186,13 @@
<ClCompile Include="position.cxx" />
<ClCompile Include="parser.cxx" />
<ClCompile Include="serializer.cxx" />
+ <ClCompile Include="grammar-input-stream.cxx" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="position.hxx" />
<ClInclude Include="parser.hxx" />
<ClInclude Include="serializer.hxx" />
+ <ClInclude Include="grammar-input-stream.hxx" />
</ItemGroup>
<ItemGroup>
<CustomBuild Include="position.xsd">
diff --git a/dist/examples/cxx/tree/streaming/streaming-vc11.vcxproj.filters b/dist/examples/cxx/tree/streaming/streaming-vc11.vcxproj.filters
index cf8b190..3597703 100644
--- a/dist/examples/cxx/tree/streaming/streaming-vc11.vcxproj.filters
+++ b/dist/examples/cxx/tree/streaming/streaming-vc11.vcxproj.filters
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
@@ -27,6 +27,9 @@
<ClCompile Include="serializer.cxx">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="grammar-input-stream.cxx">
+ <Filter>Source Files</Filter>
+ </ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="position.hxx">
@@ -38,6 +41,9 @@
<ClInclude Include="serializer.hxx">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="grammar-input-stream.hxx">
+ <Filter>Header Files</Filter>
+ </ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="Readme.txt" />
diff --git a/dist/examples/cxx/tree/streaming/streaming-vc12.vcxproj b/dist/examples/cxx/tree/streaming/streaming-vc12.vcxproj
index 6511fa9..6dba59e 100644
--- a/dist/examples/cxx/tree/streaming/streaming-vc12.vcxproj
+++ b/dist/examples/cxx/tree/streaming/streaming-vc12.vcxproj
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
@@ -190,11 +190,13 @@
<ClCompile Include="position.cxx" />
<ClCompile Include="parser.cxx" />
<ClCompile Include="serializer.cxx" />
+ <ClCompile Include="grammar-input-stream.cxx" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="position.hxx" />
<ClInclude Include="parser.hxx" />
<ClInclude Include="serializer.hxx" />
+ <ClInclude Include="grammar-input-stream.hxx" />
</ItemGroup>
<ItemGroup>
<CustomBuild Include="position.xsd">
diff --git a/dist/examples/cxx/tree/streaming/streaming-vc12.vcxproj.filters b/dist/examples/cxx/tree/streaming/streaming-vc12.vcxproj.filters
index cf8b190..3597703 100644
--- a/dist/examples/cxx/tree/streaming/streaming-vc12.vcxproj.filters
+++ b/dist/examples/cxx/tree/streaming/streaming-vc12.vcxproj.filters
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
@@ -27,6 +27,9 @@
<ClCompile Include="serializer.cxx">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="grammar-input-stream.cxx">
+ <Filter>Source Files</Filter>
+ </ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="position.hxx">
@@ -38,6 +41,9 @@
<ClInclude Include="serializer.hxx">
<Filter>Header Files</Filter>
</ClInclude>
+ <ClInclude Include="grammar-input-stream.hxx">
+ <Filter>Header Files</Filter>
+ </ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="Readme.txt" />
diff --git a/dist/examples/cxx/tree/streaming/streaming-vc8.vcproj b/dist/examples/cxx/tree/streaming/streaming-vc8.vcproj
index b7f65d0..27f6233 100644
--- a/dist/examples/cxx/tree/streaming/streaming-vc8.vcproj
+++ b/dist/examples/cxx/tree/streaming/streaming-vc8.vcproj
@@ -359,6 +359,9 @@
<File
RelativePath=".\serializer.cxx">
</File>
+ <File
+ RelativePath=".\grammar-input-stream.cxx">
+ </File>
</Filter>
<Filter
Name="Header Files"
@@ -375,6 +378,9 @@
<File
RelativePath=".\serializer.hxx">
</File>
+ <File
+ RelativePath=".\grammar-input-stream.hxx">
+ </File>
</Filter>
<Filter
Name="Schema Files"
diff --git a/dist/examples/cxx/tree/streaming/streaming-vc9.vcproj b/dist/examples/cxx/tree/streaming/streaming-vc9.vcproj
index cbfec59..8dd3f2e 100644
--- a/dist/examples/cxx/tree/streaming/streaming-vc9.vcproj
+++ b/dist/examples/cxx/tree/streaming/streaming-vc9.vcproj
@@ -356,6 +356,9 @@
<File
RelativePath=".\serializer.cxx">
</File>
+ <File
+ RelativePath=".\grammar-input-stream.cxx">
+ </File>
</Filter>
<Filter
Name="Header Files"
@@ -372,6 +375,9 @@
<File
RelativePath=".\serializer.hxx">
</File>
+ <File
+ RelativePath=".\grammar-input-stream.hxx">
+ </File>
</Filter>
<Filter
Name="Schema Files"
diff --git a/examples/cxx/tree/streaming/README b/examples/cxx/tree/streaming/README
index ac7e7f0..5a467e0 100644
--- a/examples/cxx/tree/streaming/README
+++ b/examples/cxx/tree/streaming/README
@@ -1,5 +1,5 @@
-This example shows how to perform stream-oriented, partially in-memory
-XML processing using the C++/Tree mapping. With the partially in-memory
+This example shows how to perform stream-oriented, partially in-memory
+XML processing using the C++/Tree mapping. With the partially in-memory
parsing and serialization only a part of the object model is in memory at
any given time. With this approach we can process parts of the document
as they become available as well as handle documents that are too large
@@ -17,7 +17,7 @@ position.xml
position.hxx
position.cxx
- C++ types that represent the position vocabulary as well as parsing
+ C++ types that represent the position vocabulary as well as parsing
and serialization functions. These are generated by XSD from
position.xsd.
@@ -29,15 +29,20 @@ parser.cxx
serializer.hxx
serializer.cxx
- Stream-oriented DOM serializer implementation that allows us to
+ Stream-oriented DOM serializer implementation that allows us to
serialize an XML Document as a series of object model fragments.
+grammar-input-stream.hxx
+grammar-input-stream.cxx
+ Input stream implementation with the special-purpose schema grammar
+ decompression algorithm. It is used internally by the streaming parser.
+
driver.cxx
- Driver for the example. It first parses the input file into a series
- of DOM fragments which are then parsed into the object model fragments.
- The driver prints the information from the document as it becomes
- available. The driver then creates a new XML document (out.xml) by
- creating and serializing a series of object model fragments.
+ Driver for the example. It parses the input file into a series of DOM
+ fragments which are then parsed into the object model fragments. The
+ driver prints the information from the document as it becomes available.
+ It also serializes the object model fragments into a new XML document
+ (out.xml).
To run the example simply execute:
diff --git a/examples/cxx/tree/streaming/driver.cxx b/examples/cxx/tree/streaming/driver.cxx
index 6cc2cd4..eb7026a 100644
--- a/examples/cxx/tree/streaming/driver.cxx
+++ b/examples/cxx/tree/streaming/driver.cxx
@@ -15,9 +15,6 @@
using namespace std;
using namespace xercesc;
-static void
-measure_position (unsigned int n, float& lat, float& lon);
-
int
main (int argc, char* argv[])
{
@@ -39,96 +36,92 @@ main (int argc, char* argv[])
using namespace op;
namespace xml = xsd::cxx::xml;
- // Parse.
+ // Parse and serialize at the same time, in the streaming mode.
//
ifstream ifs;
ifs.exceptions (ifstream::badbit | ifstream::failbit);
ifs.open (argv[1]);
- parser p;
-
- // The first document we get is the "carcase" of the complete document.
- // That is, the root element with all the attributes but without any
- // content. We may need it to get to the attributes in the root element.
- //
- // There are two ways this can be done. The easiest approach is to
- // instantiate the root element's type (object in our case). This
- // will only work if all the content in the root element is optional.
- // Alternatively, we can manually look up attributes that we are
- // interested in and instantiate the corresponding type. The following
- // fragment shows how to use the second approach.
- //
- xml_schema::dom::auto_ptr<DOMDocument> doc (p.start (ifs, argv[1], true));
-
- // Find the id attribute.
- //
- DOMAttr* id_attr (
- doc->getDocumentElement ()->getAttributeNode (
- xml::string ("id").c_str ()));
-
- // Use the type and traits aliases from the object model.
- //
- object::id_type id (object::id_traits::create (*id_attr, 0, 0));
- cerr << "id: " << id << endl;
-
- // The next chunk we get is the header element.
- //
- doc = p.next ();
- header hdr (*doc->getDocumentElement ());
- cerr << "name: " << hdr.name () << endl
- << "type: " << hdr.type () << endl;
-
- // The rest is position elements.
- //
- for (doc = p.next (); doc.get () != 0; doc = p.next ())
- {
- position p (*doc->getDocumentElement ());
- cerr << "lat: " << p.lat () << " lon: " << p.lon () << endl;
- }
-
- // Serialize.
- //
-
ofstream ofs;
ofs.exceptions (ios_base::badbit | ios_base::failbit);
ofs.open ("out.xml");
- serializer s;
+ xml_schema::namespace_infomap ns_map;
+ ns_map["op"].name = "http://www.codesynthesis.com/op";
+ ns_map["op"].schema = "position.xsd";
- // With this approach we manually write the XML declaration, opening
- // and closing root element tags, as well as any attributes in the
- // root element.
- //
- ofs << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << endl
- << "<op:object xmlns:op=\"http://www.codesynthesis.com/op\"" << endl
- << " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"" << endl
- << " xsi:schemaLocation=\"http://www.codesynthesis.com/op " <<
- "position.xsd\"" << endl
- << " id=\"" << 123 << "\">" << endl;
+ parser p;
+ serializer s;
+ p.start (ifs, argv[1], true);
s.start (ofs);
- // Serialize the header.
+ typedef xml_schema::dom::auto_ptr<DOMDocument> document_ptr;
+
+ // Peek at the root element. This way we only get the "carcase"
+ // of the document, that is, the root element with its name, all
+ // the attributes, and namespace declarations but without any of
+ // the nested elements.
//
- header h ("Lion's Head", "rock");
- s.next ("header", h);
+ document_ptr docr (p.peek ());
+ bool parsed (false);
- // Serialize position elements, one at a time.
+ // Parse first-level elements.
//
- for (unsigned short i (0); i < 8; i++)
+ for (document_ptr doc1 (p.peek ()); doc1.get () != 0; doc1 = p.peek ())
{
- float lat, lon;
- measure_position (i, lat, lon);
- position p (lat, lon);
- s.next ("position", p);
+ // Check whether it is an element that we should stream (position) or
+ // just add to the root (header).
+ //
+ string n1 (xml::transcode<char> (
+ doc1->getDocumentElement ()->getLocalName ()));
+
+ // If we see the first streaming element, then parse the root carcase.
+ //
+ if (!parsed && n1 == "position")
+ {
+ object o (*docr->getDocumentElement ());
+
+ cerr << "id: " << o.id () << endl
+ << "name: " << o.header ().name () << endl
+ << "type: " << o.header ().type () << endl;
+
+ // Start serializing the document by writing out the root carcase.
+ // Note that we leave it open so that we can serialize more elements.
+ //
+ s.next_open (ns_map["op"].name, "op:object", ns_map, o);
+ parsed = true;
+ }
+
+ // Handle elements that need streaming.
+ //
+ if (n1 == "position")
+ {
+ // Position has no nested elements that we need to stream so we
+ // finish parsing it in one go.
+ //
+ doc1 = p.next (doc1);
+ position pos (*doc1->getDocumentElement ());
+
+ cerr << "lat: " << pos.lat () << " lon: " << pos.lon () << endl;
+
+ // Serialize it (append) to the root element.
+ //
+ s.next ("position", pos);
+ }
+ else
+ {
+ // Element that doesn't require streaming (header in our case). Add
+ // to the root element and finish parsing.
+ //
+ docr = p.next (doc1, docr);
+ }
}
- // Close the root element.
+ // Close the root element in serializer.
//
- ofs << endl
- << "</op:object>" << endl;
-
+ s.next_close ("op:object");
}
catch (const xml_schema::exception& e)
{
@@ -144,32 +137,3 @@ main (int argc, char* argv[])
xercesc::XMLPlatformUtils::Terminate ();
return r;
}
-
-// Position measurement instrument interface.
-//
-struct measurements
-{
- float lat;
- float lon;
-};
-
-measurements test_measurements [8] =
-{
- {-33.8569F, 18.5083F},
- {-33.8568F, 18.5083F},
- {-33.8568F, 18.5082F},
- {-33.8570F, 18.5083F},
- {-33.8569F, 18.5084F},
- {-33.8570F, 18.5084F},
- {-33.8570F, 18.5082F},
- {-33.8569F, 18.5082F}
-};
-
-static void
-measure_position (unsigned int n, float& lat, float& lon)
-{
- // Call the instrument to measure the position.
- //
- lat = test_measurements[n].lat;
- lon = test_measurements[n].lon;
-}
diff --git a/examples/cxx/tree/streaming/grammar-input-stream.cxx b/examples/cxx/tree/streaming/grammar-input-stream.cxx
new file mode 100644
index 0000000..ffdb5b4
--- /dev/null
+++ b/examples/cxx/tree/streaming/grammar-input-stream.cxx
@@ -0,0 +1,96 @@
+// file : examples/cxx/tree/streaming/grammar-input-stream.cxx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : not copyrighted - public domain
+
+#include <cassert>
+#include "grammar-input-stream.hxx"
+
+grammar_input_stream::
+grammar_input_stream (const XMLByte* data, std::size_t size)
+ : data_ (data),
+ size_ (size),
+ pos_ (0),
+ vpos_ (0),
+ cseq_ (0),
+ add_zero_ (false)
+{
+}
+
+XMLFilePos grammar_input_stream::
+curPos () const
+{
+ return static_cast<XMLFilePos> (vpos_);
+}
+
+XMLSize_t grammar_input_stream::
+readBytes (XMLByte* const buf, const XMLSize_t size)
+{
+ std::size_t i (0);
+
+ // Add a zero from the alternating sequence if it didn't
+ // fit on the previous read.
+ //
+ if (add_zero_)
+ {
+ buf[i++] = 0;
+ add_zero_ = false;
+ }
+
+ // If have an unfinished sequential sequence, output it now.
+ //
+ if (cseq_ != 0 && !alt_)
+ {
+ for (; cseq_ != 0 && i < size; --cseq_)
+ buf[i++] = 0;
+ }
+
+ for (; i < size && pos_ < size_;)
+ {
+ XMLByte b = buf[i++] = data_[pos_++];
+
+ // See if we are in a compression sequence.
+ //
+ if (cseq_ != 0)
+ {
+ if (i < size)
+ buf[i++] = 0;
+ else
+ add_zero_ = true; // Add it on the next read.
+
+ cseq_--;
+ continue;
+ }
+
+ // If we are not in a compression sequence and this byte is
+ // not zero then we are done.
+ //
+ if (b != 0)
+ continue;
+
+ // We have a zero.
+ //
+ assert (pos_ < size_); // There has to be another byte.
+ unsigned char v (static_cast<unsigned char> (data_[pos_++]));
+ alt_ = (v & 128) != 0;
+ cseq_ = v & 127;
+
+ // If it is a sequential sequence, output as many zeros as
+ // we can.
+ //
+ if (!alt_)
+ {
+ for (; cseq_ != 0 && i < size; --cseq_)
+ buf[i++] = 0;
+ }
+ }
+
+ vpos_ += i;
+
+ return static_cast<XMLSize_t> (i);
+}
+
+const XMLCh* grammar_input_stream::
+getContentType () const
+{
+ return 0;
+}
diff --git a/examples/cxx/tree/streaming/grammar-input-stream.hxx b/examples/cxx/tree/streaming/grammar-input-stream.hxx
new file mode 100644
index 0000000..36ef74c
--- /dev/null
+++ b/examples/cxx/tree/streaming/grammar-input-stream.hxx
@@ -0,0 +1,41 @@
+// file : examples/cxx/tree/streaming/grammar-input-stream.hxx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : not copyrighted - public domain
+
+#ifndef GRAMMAR_INPUT_STREAM_HXX
+#define GRAMMAR_INPUT_STREAM_HXX
+
+#include <cstddef>
+#include <xercesc/util/BinInputStream.hpp>
+
+// Memory buffer input stream with the special-purpose schema
+// grammar decompression.
+//
+class grammar_input_stream: public xercesc::BinInputStream
+{
+public :
+ grammar_input_stream (const XMLByte* data, std::size_t size);
+
+ virtual XMLFilePos
+ curPos () const;
+
+ virtual XMLSize_t
+ readBytes (XMLByte* const buf, const XMLSize_t size);
+
+ virtual const XMLCh*
+ getContentType () const;
+
+private :
+ const XMLByte* data_;
+ std::size_t size_;
+ std::size_t pos_;
+ std::size_t vpos_;
+
+ // Compression data.
+ //
+ size_t cseq_; // Number of bytes left in a compression sequence.
+ bool alt_; // Alternating or sequential sequence.
+ bool add_zero_; // Add a zero on the next read.
+};
+
+#endif // GRAMMAR_INPUT_STREAM_HXX
diff --git a/examples/cxx/tree/streaming/makefile b/examples/cxx/tree/streaming/makefile
index cb8e8c7..f6db5c0 100644
--- a/examples/cxx/tree/streaming/makefile
+++ b/examples/cxx/tree/streaming/makefile
@@ -5,7 +5,7 @@
include $(dir $(lastword $(MAKEFILE_LIST)))../../../../build/bootstrap.make
xsd := position.xsd
-cxx := driver.cxx parser.cxx serializer.cxx
+cxx := driver.cxx parser.cxx serializer.cxx grammar-input-stream.cxx
obj := $(addprefix $(out_base)/,$(cxx:.cxx=.o) $(xsd:.xsd=.o))
dep := $(obj:.o=.o.d)
@@ -58,6 +58,8 @@ $(install):
$(call install-data,$(src_base)/parser.hxx,$(install_doc_dir)/xsd/$(path)/parser.hxx)
$(call install-data,$(src_base)/serializer.cxx,$(install_doc_dir)/xsd/$(path)/serializer.cxx)
$(call install-data,$(src_base)/serializer.hxx,$(install_doc_dir)/xsd/$(path)/serializer.hxx)
+ $(call install-data,$(src_base)/grammar-input-stream.cxx,$(install_doc_dir)/xsd/$(path)/grammar-input-stream.cxx)
+ $(call install-data,$(src_base)/grammar-input-stream.hxx,$(install_doc_dir)/xsd/$(path)/grammar-input-stream.hxx)
$(call install-data,$(src_base)/position.xsd,$(install_doc_dir)/xsd/$(path)/position.xsd)
$(call install-data,$(src_base)/position.xml,$(install_doc_dir)/xsd/$(path)/position.xml)
@@ -67,6 +69,8 @@ $(dist-common):
$(call install-data,$(src_base)/parser.hxx,$(dist_prefix)/$(path)/parser.hxx)
$(call install-data,$(src_base)/serializer.cxx,$(dist_prefix)/$(path)/serializer.cxx)
$(call install-data,$(src_base)/serializer.hxx,$(dist_prefix)/$(path)/serializer.hxx)
+ $(call install-data,$(src_base)/grammar-input-stream.cxx,$(dist_prefix)/$(path)/grammar-input-stream.cxx)
+ $(call install-data,$(src_base)/grammar-input-stream.hxx,$(dist_prefix)/$(path)/grammar-input-stream.hxx)
$(call install-data,$(src_base)/position.xsd,$(dist_prefix)/$(path)/position.xsd)
$(call install-data,$(src_base)/position.xml,$(dist_prefix)/$(path)/position.xml)
diff --git a/examples/cxx/tree/streaming/parser.cxx b/examples/cxx/tree/streaming/parser.cxx
index a1f5bb8..41ad7af 100644
--- a/examples/cxx/tree/streaming/parser.cxx
+++ b/examples/cxx/tree/streaming/parser.cxx
@@ -1,5 +1,4 @@
-// file : examples/cxx/tree/streaming/parser.cxx
-// copyright : not copyrighted - public domain
+#include <cassert>
#include <xercesc/util/XMLUni.hpp>
#include <xercesc/util/XMLString.hpp>
@@ -12,6 +11,9 @@
#include <xercesc/dom/DOM.hpp>
#include <xercesc/dom/impl/DOMTextImpl.hpp>
+#include <xercesc/validators/common/Grammar.hpp> // xercesc::Grammar
+#include <xercesc/framework/XMLGrammarPoolImpl.hpp>
+
#include <xsd/cxx/auto-array.hxx>
#include <xsd/cxx/xml/sax/std-input-source.hxx>
@@ -21,6 +23,7 @@
#include <xsd/cxx/tree/error-handler.hxx>
#include "parser.hxx"
+#include "grammar-input-stream.hxx"
using namespace std;
using namespace xercesc;
@@ -28,16 +31,22 @@ using namespace xercesc;
namespace xml = xsd::cxx::xml;
namespace tree = xsd::cxx::tree;
+typedef parser::document_ptr document_ptr;
+
class parser_impl: public DefaultHandler
{
public:
- parser_impl ();
+ parser_impl (const XMLByte* grammar, size_t grammar_size);
- xml::dom::auto_ptr<DOMDocument>
+ void
start (istream& is, const string& id, bool validate);
- xml::dom::auto_ptr<DOMDocument>
- next ();
+ document_ptr
+ peek ();
+
+ document_ptr
+ next (document_ptr doc = document_ptr (),
+ document_ptr outer_doc = document_ptr ());
// SAX event handlers.
//
@@ -61,6 +70,7 @@ private:
// SAX parser.
//
bool clean_;
+ auto_ptr<XMLGrammarPool> grammar_pool_;
auto_ptr<SAX2XMLReader> parser_;
XMLPScanToken token_;
tree::error_handler<char> error_handler_;
@@ -68,23 +78,40 @@ private:
auto_ptr<xml::sax::std_input_source> isrc_;
size_t depth_;
+ size_t whitespace_depth_; // Depth at which to ignore whitespaces.
+
+ bool peek_;
+ size_t next_depth_; // Depth at which next() should work.
// DOM document being built.
//
DOMImplementation& dom_impl_;
- xml::dom::auto_ptr<DOMDocument> doc_;
+ document_ptr doc_;
DOMElement* cur_;
};
const XMLCh ls[] = {chLatin_L, chLatin_S, chNull};
parser_impl::
-parser_impl ()
+parser_impl (const XMLByte* grammar, size_t grammar_size)
: clean_ (true),
- parser_ (XMLReaderFactory::createXMLReader ()),
error_proxy_ (error_handler_),
dom_impl_ (*DOMImplementationRegistry::getDOMImplementation (ls))
{
+ MemoryManager* mm (XMLPlatformUtils::fgMemoryManager);
+
+ if (grammar != 0)
+ {
+ assert (grammar_size != 0);
+ grammar_pool_.reset (new XMLGrammarPoolImpl (mm));
+
+ grammar_input_stream is (grammar, grammar_size);
+ grammar_pool_->deserializeGrammars(&is);
+ grammar_pool_->lockPool ();
+ }
+
+ parser_.reset (XMLReaderFactory::createXMLReader (mm, grammar_pool_.get ()));
+
parser_->setFeature (XMLUni::fgSAX2CoreNameSpaces, true);
parser_->setFeature (XMLUni::fgSAX2CoreNameSpacePrefixes, true);
parser_->setFeature (XMLUni::fgXercesValidationErrorAsFatal, true);
@@ -106,12 +133,13 @@ parser_impl ()
parser_->setContentHandler (this);
}
-xml::dom::auto_ptr<DOMDocument> parser_impl::
+void parser_impl::
start (istream& is, const string& id, bool val)
{
// Reset our state.
//
depth_ = 0;
+ peek_ = false;
doc_.reset ();
error_handler_.reset ();
@@ -125,59 +153,116 @@ start (istream& is, const string& id, bool val)
parser_->setFeature (XMLUni::fgSAX2CoreValidation, val);
parser_->setFeature (XMLUni::fgXercesSchema, val);
- // Start parsing. The first document that we return is a "carcase"
- // of the complete document. That is, the root element with all the
- // attributes but without any content.
- //
- bool r (parser_->parseFirst (*isrc_, token_));
+ if (val && grammar_pool_.get () != 0)
+ {
+ // Use the loaded grammar during parsing.
+ //
+ parser_->setFeature (XMLUni::fgXercesUseCachedGrammarInParse, true);
+
+ // Disable loading schemas via other means (e.g., schemaLocation).
+ //
+ parser_->setFeature (XMLUni::fgXercesLoadSchema, false);
+ }
+
+ parser_->parseFirst (*isrc_, token_);
error_handler_.throw_if_failed<tree::parsing<char> > ();
+}
+
+document_ptr parser_impl::
+peek ()
+{
+ bool r (true);
+
+ size_t d (depth_);
+ whitespace_depth_ = d;
- while (r && depth_ == 0)
+ peek_ = true;
+
+ // Parse (skip whitespace content) until the depth increases or we get
+ // a document. The latter test covers <element/> cases where both start
+ // and end events will trigger and therefore leave the depth unchanged.
+ //
+ while (r && depth_ == d && doc_.get () == 0)
{
r = parser_->parseNext (token_);
error_handler_.throw_if_failed<tree::parsing<char> > ();
}
if (!r)
- return xml::dom::auto_ptr<DOMDocument> (0);
+ return document_ptr (0);
return doc_;
}
-xml::dom::auto_ptr<DOMDocument> parser_impl::
-next ()
+document_ptr parser_impl::
+next (document_ptr doc, document_ptr outer_doc)
{
- // We should be at depth 1. If not, then we are done parsing.
+ assert (peek_ == (doc.get () != 0));
+
+ // Install doc/outer_doc as the document we are parsing.
//
- if (depth_ != 1)
- return xml::dom::auto_ptr<DOMDocument> (0);
+ if (doc.get () != 0)
+ {
+ if (outer_doc.get () != 0)
+ {
+ // Copy doc to outer_doc.
+ //
+ doc_ = outer_doc;
+ cur_ = static_cast<DOMElement*> (
+ doc_->importNode (doc->getDocumentElement (), true));
+ doc_->getDocumentElement ()->appendChild (cur_);
+ }
+ else
+ {
+ doc_ = doc;
+ cur_ = doc_->getDocumentElement ();
+ }
+
+ // This handles the <element/> case where we get both start and
+ // end events in peek(). In this case the element is fully parsed
+ // and next() has nothing to do.
+ //
+ if (depth_ != next_depth_)
+ {
+ peek_ = false;
+ return doc_;
+ }
+ }
bool r (true);
+ // If we peeked, then we have already seen the start tag and our
+ // return depth is one above the current depth.
+ //
+ size_t d (peek_ ? depth_ - 1 : depth_);
+ whitespace_depth_ = d;
+
+ peek_ = false;
+
// Keep calling parseNext() until we either move to a greater depth or
// get a document. This way we skip the text (presumably whitespaces)
- // that may be preceding the next chunk.
+ // that may be preceding this chunk.
//
- while (r && depth_ == 1 && doc_.get () == 0)
+ while (r && depth_ == d && doc_.get () == 0)
{
parser_->parseNext (token_);
error_handler_.throw_if_failed<tree::parsing<char> > ();
}
if (!r)
- return xml::dom::auto_ptr<DOMDocument> (0);
+ return document_ptr (0);
- // If we are not at depth 1, keep calling parseNext() until we get
- // there.
+ // If we are not at our start depth, keep calling parseNext() until we
+ // get there again.
//
- while (r && depth_ != 1)
+ while (r && depth_ != d)
{
r = parser_->parseNext (token_);
error_handler_.throw_if_failed<tree::parsing<char> > ();
}
if (!r)
- return xml::dom::auto_ptr<DOMDocument> (0);
+ return document_ptr (0);
return doc_;
}
@@ -221,6 +306,9 @@ startElement (const XMLCh* const uri,
}
depth_++;
+
+ if (peek_)
+ next_depth_ = depth_;
}
void parser_impl::
@@ -239,9 +327,10 @@ characters (const XMLCh* const s, const XMLSize_t length)
{
const XMLCh empty[] = {chNull};
- // Ignore text content (presumably whitespaces) in the root element.
+ // Ignore text content (presumably whitespaces) while looking for
+ // the next element.
//
- if (depth_ > 1)
+ if (depth_ > whitespace_depth_)
{
DOMText* t = doc_->createTextNode (empty);
static_cast<DOMTextImpl*> (t)->appendData (s, length);
@@ -259,19 +348,25 @@ parser::
}
parser::
-parser ()
- : impl_ (new parser_impl)
+parser (const XMLByte* grammar, size_t grammar_size)
+ : impl_ (new parser_impl (grammar, grammar_size))
{
}
-xml::dom::auto_ptr<DOMDocument> parser::
+void parser::
start (istream& is, const string& id, bool val)
{
return impl_->start (is, id, val);
}
-xml::dom::auto_ptr<DOMDocument> parser::
-next ()
+document_ptr parser::
+peek ()
+{
+ return impl_->peek ();
+}
+
+document_ptr parser::
+next (document_ptr doc, document_ptr outer_doc)
{
- return impl_->next ();
+ return impl_->next (doc, outer_doc);
}
diff --git a/examples/cxx/tree/streaming/parser.hxx b/examples/cxx/tree/streaming/parser.hxx
index a9c53ba..cb34f92 100644
--- a/examples/cxx/tree/streaming/parser.hxx
+++ b/examples/cxx/tree/streaming/parser.hxx
@@ -1,12 +1,10 @@
-// file : examples/cxx/tree/streaming/parser.hxx
-// copyright : not copyrighted - public domain
-
#ifndef PARSER_HXX
#define PARSER_HXX
#include <string>
#include <iosfwd>
-#include <memory> // std::auto_ptr
+#include <cstddef> // std::size_t
+#include <memory> // std::auto_ptr
#include <xercesc/dom/DOMDocument.hpp>
@@ -17,20 +15,44 @@ class parser_impl;
class parser
{
public:
+ // We can specify embedded XML Schema grammar to be used by the parser
+ // that was created by the xsdbin utility from the 'embedded' example.
+ //
+ parser (const XMLByte* grammar = 0, std::size_t grammar_size = 0);
~parser ();
- parser ();
- // The start function returns a "carcase" of the complete document. That
- // is, the root element with all the attributes but without any content.
+ // The start function prepares everything for parsing a new document.
//
- xsd::cxx::xml::dom::auto_ptr<xercesc::DOMDocument>
+ void
start (std::istream& is, const std::string& id, bool validate);
- // The next function returns next first-level element with all its
- // attributes and content or 0 if no more available.
+ typedef xsd::cxx::xml::dom::auto_ptr<xercesc::DOMDocument> document_ptr;
+
+ // The peek function parses just the next element (ignoring any
+ // preceding content assuming it is whitespace) without parsing
+ // any of its nested content (but it includes the element's
+ // attributes). It returns NULL if there are no more elements
+ // at this level (there could still be on outer levels in case
+ // of nested streaming).
+ //
+ document_ptr
+ peek ();
+
+ // The next function parses (or finishes parsing after peek) the
+ // next element including its nested content. It returns NULL if
+ // there are no more elements at this level (there could still
+ // be on outer levels in case of nested streaming).
+ //
+ // If doc is not NULL, then it should be the document returned
+ // by peek(). That is, a document with only the root element.
+ // In this case next() finishes parsing this element.
+ //
+ // If outer_doc is not NULL, then next() will first add doc to
+ // outer_doc as a child of the document root.
//
- xsd::cxx::xml::dom::auto_ptr<xercesc::DOMDocument>
- next ();
+ document_ptr
+ next (document_ptr doc = document_ptr (),
+ document_ptr outer_doc = document_ptr ());
private:
parser (const parser&);
diff --git a/examples/cxx/tree/streaming/serializer.cxx b/examples/cxx/tree/streaming/serializer.cxx
index 04d2db4..0ce8156 100644
--- a/examples/cxx/tree/streaming/serializer.cxx
+++ b/examples/cxx/tree/streaming/serializer.cxx
@@ -1,9 +1,11 @@
-// file : examples/cxx/tree/streaming/serializer.cxx
-// copyright : not copyrighted - public domain
+#include <vector>
+#include <cassert>
+#include <cstddef>
#include <xercesc/util/XMLUni.hpp>
#include <xercesc/dom/DOM.hpp>
+#include <xercesc/dom/impl/DOMLSSerializerImpl.hpp>
#include <xsd/cxx/xml/string.hxx>
#include <xsd/cxx/xml/dom/bits/error-handler-proxy.hxx>
@@ -20,6 +22,319 @@ using namespace xercesc;
namespace xml = xsd::cxx::xml;
namespace tree = xsd::cxx::tree;
+static const XMLCh gEOLSeq[] =
+{
+ chLF, chNull
+};
+
+static const XMLCh gUTF8[] =
+{
+ chLatin_U, chLatin_T, chLatin_F, chDash, chDigit_8, chNull
+};
+
+static const XMLCh gEndElement[] =
+{
+ chOpenAngle, chForwardSlash, chNull
+};
+
+static const int DISCARD_DEFAULT_CONTENT_ID = 0x1;
+static const int ENTITIES_ID = 0x2;
+static const int FORMAT_PRETTY_PRINT_1ST_LEVEL_ID = 0xA;
+
+class StreamingDOMSerializer: public DOMLSSerializerImpl
+{
+public:
+ StreamingDOMSerializer (MemoryManager* manager)
+ : DOMLSSerializerImpl (manager)
+ {
+ }
+
+ bool
+ startOpen (const DOMElement* e, DOMLSOutput* const destination)
+ {
+ const DOMDocument* docu (e->getOwnerDocument ());
+ assert (docu != 0);
+
+ // Code adapted from DOMLSSerializerImpl::write().
+ //
+ target_ = destination->getByteStream();
+
+ fEncodingUsed = gUTF8;
+
+ const XMLCh* lsEncoding=destination->getEncoding();
+ if (lsEncoding && *lsEncoding)
+ {
+ fEncodingUsed = lsEncoding;
+ }
+ else if (docu)
+ {
+ const XMLCh* tmpEncoding = docu->getInputEncoding();
+
+ if ( tmpEncoding && *tmpEncoding)
+ {
+ fEncodingUsed = tmpEncoding;
+ }
+ else
+ {
+ tmpEncoding = docu->getXmlEncoding();
+
+ if ( tmpEncoding && *tmpEncoding)
+ {
+ fEncodingUsed = tmpEncoding;
+ }
+ }
+ }
+
+ fNewLineUsed = (fNewLine && *fNewLine)? fNewLine : gEOLSeq;
+
+ fDocumentVersion = (docu->getXmlVersion() && *(docu->getXmlVersion()))
+ ? docu->getXmlVersion()
+ : XMLUni::fgVersion1_0;
+
+ fErrorCount = 0;
+
+ fLineFeedInTextNodePrinted = false;
+ fLastWhiteSpaceInTextNode = 0;
+
+ level_ = 0;
+ namespace_map_.clear ();
+
+ fFormatter = new (fMemoryManager) XMLFormatter( fEncodingUsed
+ ,fDocumentVersion
+ ,target_
+ ,XMLFormatter::NoEscapes
+ ,XMLFormatter::UnRep_CharRef
+ ,fMemoryManager);
+ formatter_.reset (fFormatter);
+
+ // Write out the XML declaration, etc. Here we assume that the document
+ // has no children (i.e., no root element).
+ //
+ processNode (docu, 0);
+ fLineFeedInTextNodePrinted = true;
+
+ return writeOpen (e);
+ }
+
+ bool
+ writeOpen (const DOMElement* e)
+ {
+ // Code adapted from the first part of ELEMENT_NODE case in
+ // DOMLSSerializerImpl::processNode().
+ //
+
+ if (!fLineFeedInTextNodePrinted)
+ {
+ if(level_ == 1 && getFeature(FORMAT_PRETTY_PRINT_1ST_LEVEL_ID))
+ printNewLine();
+
+ printNewLine();
+ }
+ else
+ {
+ fLineFeedInTextNodePrinted = false;
+ }
+
+ printIndent(level_);
+
+ RefHashTableOf<XMLCh>* namespaceMap = NULL;
+
+ *fFormatter << XMLFormatter::NoEscapes << chOpenAngle <<
+ e->getNodeName ();
+
+ setURCharRef();
+ DOMNamedNodeMap *attributes = e->getAttributes();
+ XMLSize_t attrCount = attributes->getLength();
+
+ const XMLCh* prefix = e->getPrefix();
+ const XMLCh* uri = e->getNamespaceURI();
+ if((uri && uri[0]) ||
+ ((prefix==0 || prefix[0]==0) && isDefaultNamespacePrefixDeclared()))
+ {
+ if(prefix==0 || prefix[0]==0)
+ prefix=XMLUni::fgZeroLenString;
+ if(!isNamespaceBindingActive(prefix, uri))
+ {
+ if(namespaceMap==NULL)
+ {
+ namespaceMap=new (fMemoryManager) RefHashTableOf<XMLCh>(12, false, fMemoryManager);
+ fNamespaceStack->addElement(namespaceMap);
+ }
+ namespaceMap->put((void*)prefix,(XMLCh*)uri);
+ *fFormatter << XMLFormatter::NoEscapes
+ << chSpace << XMLUni::fgXMLNSString;
+
+ if(!XMLString::equals(prefix,XMLUni::fgZeroLenString))
+ *fFormatter << chColon << prefix;
+
+ *fFormatter << chEqual << chDoubleQuote
+ << XMLFormatter::AttrEscapes
+ << uri
+ << XMLFormatter::NoEscapes
+ << chDoubleQuote;
+ }
+ }
+
+ bool discard = getFeature(DISCARD_DEFAULT_CONTENT_ID);
+ for (XMLSize_t i = 0; i < attrCount; i++)
+ {
+ DOMAttr* attribute = (DOMAttr*)attributes->item(i);
+
+ if (discard && !((DOMAttr*)attribute )->getSpecified())
+ continue;
+
+ // if this attribute is a namespace declaration, add it to the namespace map for the current level
+ const XMLCh* ns = attribute->getNamespaceURI();
+ if (ns != 0 )
+ {
+ if(XMLString::equals(ns, XMLUni::fgXMLNSURIName))
+ {
+ if(namespaceMap==NULL)
+ {
+ namespaceMap=new (fMemoryManager) RefHashTableOf<XMLCh>(12, false, fMemoryManager);
+ fNamespaceStack->addElement(namespaceMap);
+ }
+ const XMLCh* nsPrefix = attribute->getLocalName();
+ if(XMLString::equals(attribute->getNodeName(),XMLUni::fgXMLNSString))
+ nsPrefix = XMLUni::fgZeroLenString;
+ if(namespaceMap->containsKey((void*)nsPrefix))
+ continue;
+ namespaceMap->put((void*)attribute->getLocalName(),(XMLCh*)attribute->getNodeValue());
+ }
+ else if(!XMLString::equals(ns, XMLUni::fgXMLURIName))
+ {
+ // check if the namespace for the current node is already defined
+ const XMLCh* prefix = attribute->getPrefix();
+ if(prefix && prefix[0])
+ {
+ const XMLCh* uri = attribute->getNamespaceURI();
+ if(!isNamespaceBindingActive(prefix, uri))
+ {
+ if(namespaceMap==NULL)
+ {
+ namespaceMap=new (fMemoryManager) RefHashTableOf<XMLCh>(12, false, fMemoryManager);
+ fNamespaceStack->addElement(namespaceMap);
+ }
+ namespaceMap->put((void*)prefix,(XMLCh*)uri);
+ *fFormatter << XMLFormatter::NoEscapes
+ << chSpace << XMLUni::fgXMLNSString << chColon << prefix
+ << chEqual << chDoubleQuote
+ << XMLFormatter::AttrEscapes
+ << uri
+ << XMLFormatter::NoEscapes
+ << chDoubleQuote;
+ }
+ }
+ }
+ }
+
+ if (XMLString::equals(ns, XMLUni::fgXMLNSURIName) || checkFilter(attribute) == DOMNodeFilter::FILTER_ACCEPT)
+ {
+ *fFormatter << XMLFormatter::NoEscapes
+ << chSpace << attribute->getNodeName()
+ << chEqual << chDoubleQuote
+ << XMLFormatter::AttrEscapes;
+
+ if (getFeature(ENTITIES_ID))
+ {
+ DOMNode* child = attribute->getFirstChild();
+ while( child != 0)
+ {
+ if(child->getNodeType()==DOMNode::TEXT_NODE)
+ *fFormatter << child->getNodeValue();
+ else if(child->getNodeType()==DOMNode::ENTITY_REFERENCE_NODE)
+ *fFormatter << XMLFormatter::NoEscapes
+ << chAmpersand << child->getNodeName() << chSemiColon
+ << XMLFormatter::AttrEscapes;
+ child = child->getNextSibling();
+ }
+ }
+ else
+ *fFormatter << attribute->getNodeValue();
+
+ *fFormatter << XMLFormatter::NoEscapes << chDoubleQuote;
+ }
+ }
+
+ *fFormatter << XMLFormatter::NoEscapes << chCloseAngle;
+
+ // Keep track of whether we have added a namespace map for this
+ // element. Used to pop it in writeClose().
+ //
+ namespace_map_.push_back (namespaceMap != 0);
+
+ level_++;
+
+ DOMNode* child = e->getFirstChild();
+ while (child != 0)
+ {
+ processNode (child, level_);
+ child = child->getNextSibling();
+ }
+
+ return fErrorCount == 0;
+ }
+
+ bool
+ writeClose (const XMLCh* name)
+ {
+ // Code adapted from the second part of ELEMENT_NODE case in
+ // DOMLSSerializerImpl::processNode().
+ //
+ level_--;
+
+ // Assume we are not on the same line (nodeLine != fCurrentLine).
+ //
+ {
+ if (!fLineFeedInTextNodePrinted)
+ {
+ printNewLine();
+ }
+ else
+ {
+ fLineFeedInTextNodePrinted = false;
+ }
+
+ if(level_ == 0 && getFeature(FORMAT_PRETTY_PRINT_1ST_LEVEL_ID))
+ printNewLine();
+
+ printIndent(level_);
+ }
+
+ *fFormatter << XMLFormatter::NoEscapes << gEndElement <<
+ name << chCloseAngle;
+
+ if (namespace_map_.back ())
+ fNamespaceStack->removeLastElement();
+
+ namespace_map_.pop_back ();
+
+ if (level_ == 0)
+ {
+ printNewLine();
+ target_->flush ();
+ }
+
+ return fErrorCount == 0;
+ }
+
+ bool
+ write (const DOMElement* e)
+ {
+ processNode (e, level_);
+ return fErrorCount == 0;
+ }
+
+ using DOMLSSerializerImpl::write; // Whole document.
+
+private:
+ XMLFormatTarget* target_;
+ std::auto_ptr<XMLFormatter> formatter_;
+ int level_;
+
+ std::vector<bool> namespace_map_;
+};
+
class serializer_impl
{
public:
@@ -37,17 +352,25 @@ public:
create (const string& ns, const string& qname, const namespace_infomap&);
void
- serialize (DOMElement& e);
+ serialize (xml::dom::auto_ptr<DOMElement>);
+
+ void
+ serialize_open (xml::dom::auto_ptr<DOMElement>);
+
+ void
+ serialize_close (const string&);
private:
void
- add_namespaces (xercesc::DOMElement*, const namespace_infomap&);
+ clear_document ();
private:
+ bool start_;
+
// Serializer.
//
xml::dom::auto_ptr<DOMLSOutput> out_;
- xml::dom::auto_ptr<DOMLSSerializer> serializer_;
+ xml::dom::auto_ptr<StreamingDOMSerializer> serializer_;
auto_ptr<xml::dom::ostream_format_target> oft_;
@@ -58,6 +381,10 @@ private:
//
DOMImplementation& dom_impl_;
xml::dom::auto_ptr<DOMDocument> doc_;
+ vector<DOMElement*> element_stack_;
+
+ size_t element_count_; // Number of elements serialized using current doc.
+ static const size_t element_count_limit_ = 500;
};
const XMLCh ls[] = {chLatin_L, chLatin_S, chNull};
@@ -65,27 +392,35 @@ const XMLCh ls[] = {chLatin_L, chLatin_S, chNull};
serializer_impl::
serializer_impl ()
: error_proxy_ (error_handler_),
- dom_impl_ (*DOMImplementationRegistry::getDOMImplementation (ls)),
- doc_ (dom_impl_.createDocument ())
+ dom_impl_ (*DOMImplementationRegistry::getDOMImplementation (ls))
{
- serializer_.reset (dom_impl_.createLSSerializer ());
- DOMConfiguration* conf (serializer_->getDomConfig ());
+ serializer_.reset (
+ new (XMLPlatformUtils::fgMemoryManager)
+ StreamingDOMSerializer (XMLPlatformUtils::fgMemoryManager));
+ DOMConfiguration* conf (serializer_->getDomConfig ());
conf->setParameter (XMLUni::fgDOMErrorHandler, &error_proxy_);
+ conf->setParameter (XMLUni::fgDOMXMLDeclaration, true);
conf->setParameter (XMLUni::fgDOMWRTDiscardDefaultContent, true);
conf->setParameter (XMLUni::fgDOMWRTFormatPrettyPrint, true);
- conf->setParameter (XMLUni::fgDOMXMLDeclaration, false);
+ conf->setParameter (XMLUni::fgDOMWRTXercesPrettyPrint, false);
}
void serializer_impl::
start (ostream& os, const string& encoding)
{
+ element_stack_.clear ();
+ doc_.reset (dom_impl_.createDocument ());
+ element_count_ = 0;
+
error_handler_.reset ();
oft_.reset (new xml::dom::ostream_format_target (os));
out_.reset (dom_impl_.createLSOutput ());
out_->setEncoding (xml::string (encoding).c_str ());
out_->setByteStream (oft_.get ());
+
+ start_ = true;
}
DOMElement* serializer_impl::
@@ -94,7 +429,13 @@ create (const string& name, const namespace_infomap& map)
DOMElement* r (doc_->createElement (xml::string (name).c_str ()));
if (!map.empty ())
- add_namespaces (r, map);
+ xml::dom::add_namespaces<char> (*r, map);
+
+ // Add the element as the child of the stack "tip" so that it
+ // "sees" all the namespace declarations active from this point.
+ //
+ if (!element_stack_.empty ())
+ element_stack_.back ()->appendChild (r);
return r;
}
@@ -107,42 +448,111 @@ create (const string& ns, const string& qname, const namespace_infomap& map)
xml::string (ns).c_str (), xml::string (qname).c_str ()));
if (!map.empty ())
- add_namespaces (r, map);
+ xml::dom::add_namespaces<char> (*r, map);
+
+ // Add the element as the child of the stack "tip" so that it
+ // "sees" all the namespace declarations active from this point.
+ //
+ if (!element_stack_.empty ())
+ element_stack_.back ()->appendChild (r);
return r;
}
void serializer_impl::
-add_namespaces (DOMElement* e, const namespace_infomap& map)
+serialize (xml::dom::auto_ptr<DOMElement> p)
{
- for (namespace_infomap::const_iterator i (map.begin ()), end (map.end ());
- i != end; ++i)
+ DOMElement* e (p.get ());
+
+ if (start_)
{
- if (i->first.empty ())
- {
- // Empty prefix.
- //
- if (!i->second.name.empty ())
- e->setAttributeNS (
- XMLUni::fgXMLNSURIName,
- xml::string ("xmlns").c_str (),
- xml::string (i->second.name).c_str ());
- }
- else
- {
- e->setAttributeNS (
- XMLUni::fgXMLNSURIName,
- xml::string ("xmlns:" + i->first).c_str (),
- xml::string (i->second.name).c_str ());
- }
+ serializer_->write (e, out_.get ());
+ start_ = false;
}
+ else
+ serializer_->write (e);
+
+ error_handler_.throw_if_failed<tree::serialization<char> > ();
+
+ // Remove this element from its parent before we release.
+ //
+ if (!element_stack_.empty ())
+ element_stack_.back ()->removeChild (e);
+
+ p.reset (); // Release it before we may clear the document below.
+
+ if (element_count_++ > element_count_limit_)
+ clear_document ();
}
void serializer_impl::
-serialize (DOMElement& e)
+serialize_open (xml::dom::auto_ptr<DOMElement> p)
{
- serializer_->write (&e, out_.get ());
+ DOMElement* e (p.get ());
+
+ if (start_)
+ {
+ serializer_->startOpen (e, out_.get ());
+ start_ = false;
+ }
+ else
+ serializer_->writeOpen (e);
+
+ error_handler_.throw_if_failed<tree::serialization<char> > ();
+
+ // Add this element to the element stack. serialize_close() is
+ // responsible for its removal and releasing.
+ //
+ element_stack_.push_back (e);
+ p.release ();
+}
+
+void serializer_impl::
+serialize_close (const string& name)
+{
+ serializer_->writeClose (xml::string (name).c_str ());
error_handler_.throw_if_failed<tree::serialization<char> > ();
+
+ // Release the element.
+ //
+ DOMElement* e (element_stack_.back ());
+ element_stack_.pop_back ();
+
+ if (!element_stack_.empty ())
+ element_stack_.back ()->removeChild (e);
+
+ e->release ();
+
+ if (element_count_++ > element_count_limit_)
+ clear_document ();
+}
+
+void serializer_impl::
+clear_document ()
+{
+ // Re-create the document in order to force deallocation of its
+ // internal heap. While Xerces-C++ DOM tries to re-use memory,
+ // it still accumulates no longer used memory blocks.
+ //
+ xml::dom::auto_ptr<DOMDocument> doc (dom_impl_.createDocument ());
+
+ if (!element_stack_.empty ())
+ {
+ DOMElement* e (
+ static_cast<DOMElement*> (
+ doc->importNode (element_stack_.front (), true)));
+
+ for (vector<DOMElement*>::iterator i (element_stack_.begin ());
+ i != element_stack_.end ();
+ ++i)
+ {
+ *i = e;
+ e = static_cast<DOMElement*> (e->getFirstChild ());
+ }
+ }
+
+ doc_ = doc;
+ element_count_ = 0;
}
//
@@ -179,7 +589,19 @@ create (const string& ns, const string& qname, const namespace_infomap& map)
}
void serializer::
-serialize (DOMElement& e)
+serialize (xml::dom::auto_ptr<DOMElement> e)
{
impl_->serialize (e);
}
+
+void serializer::
+serialize_open (xml::dom::auto_ptr<DOMElement> e)
+{
+ impl_->serialize_open (e);
+}
+
+void serializer::
+serialize_close (const string& name)
+{
+ impl_->serialize_close (name);
+}
diff --git a/examples/cxx/tree/streaming/serializer.hxx b/examples/cxx/tree/streaming/serializer.hxx
index 4f2bf65..43fab69 100644
--- a/examples/cxx/tree/streaming/serializer.hxx
+++ b/examples/cxx/tree/streaming/serializer.hxx
@@ -1,4 +1,5 @@
// file : examples/cxx/tree/streaming/serializer.hxx
+// author : Boris Kolpackov <boris@codesynthesis.com>
// copyright : not copyrighted - public domain
#ifndef SERIALIZER_HXX
@@ -59,6 +60,32 @@ public:
const namespace_infomap&,
const T& x);
+ // The next_open/close functions are like next() but split into two steps.
+ // next_open() serializes the object model fragment into an element leaving
+ // it open while next_close() closes the element.
+ //
+ template <typename T>
+ void
+ next_open (const std::string& name, const T& x);
+
+ template <typename T>
+ void
+ next_open (const std::string& name, const namespace_infomap&, const T& x);
+
+ template <typename T>
+ void
+ next_open (const std::string& ns, const std::string& name, const T& x);
+
+ template <typename T>
+ void
+ next_open (const std::string& ns,
+ const std::string& name,
+ const namespace_infomap&,
+ const T& x);
+
+ void
+ next_close (const std::string& name);
+
private:
serializer (const serializer&);
@@ -75,7 +102,13 @@ private:
const namespace_infomap&);
void
- serialize (xercesc::DOMElement&);
+ serialize (xsd::cxx::xml::dom::auto_ptr<xercesc::DOMElement>);
+
+ void
+ serialize_open (xsd::cxx::xml::dom::auto_ptr<xercesc::DOMElement>);
+
+ void
+ serialize_close (const std::string& name);
private:
std::auto_ptr<serializer_impl> impl_;
@@ -88,7 +121,7 @@ next (const std::string& name, const T& x)
xsd::cxx::xml::dom::auto_ptr<xercesc::DOMElement> e (
create (name, namespace_infomap ()));
*e << x;
- serialize (*e);
+ serialize (e);
}
template <typename T>
@@ -97,7 +130,7 @@ next (const std::string& name, const namespace_infomap& map, const T& x)
{
xsd::cxx::xml::dom::auto_ptr<xercesc::DOMElement> e (create (name, map));
*e << x;
- serialize (*e);
+ serialize (e);
}
template <typename T>
@@ -106,9 +139,8 @@ next (const std::string& ns, const std::string& name, const T& x)
{
xsd::cxx::xml::dom::auto_ptr<xercesc::DOMElement> e (
create (ns, name, namespace_infomap ()));
-
*e << x;
- serialize (*e);
+ serialize (e);
}
template <typename T>
@@ -120,7 +152,54 @@ next (const std::string& ns,
{
xsd::cxx::xml::dom::auto_ptr<xercesc::DOMElement> e (create (ns, name, map));
*e << x;
- serialize (*e);
+ serialize (e);
+}
+
+template <typename T>
+inline void serializer::
+next_open (const std::string& name, const T& x)
+{
+ xsd::cxx::xml::dom::auto_ptr<xercesc::DOMElement> e (
+ create (name, namespace_infomap ()));
+ *e << x;
+ serialize_open (e);
+}
+
+template <typename T>
+inline void serializer::
+next_open (const std::string& name, const namespace_infomap& map, const T& x)
+{
+ xsd::cxx::xml::dom::auto_ptr<xercesc::DOMElement> e (create (name, map));
+ *e << x;
+ serialize_open (e);
+}
+
+template <typename T>
+inline void serializer::
+next_open (const std::string& ns, const std::string& name, const T& x)
+{
+ xsd::cxx::xml::dom::auto_ptr<xercesc::DOMElement> e (
+ create (ns, name, namespace_infomap ()));
+ *e << x;
+ serialize_open (e);
+}
+
+template <typename T>
+inline void serializer::
+next_open (const std::string& ns,
+ const std::string& name,
+ const namespace_infomap& map,
+ const T& x)
+{
+ xsd::cxx::xml::dom::auto_ptr<xercesc::DOMElement> e (create (ns, name, map));
+ *e << x;
+ serialize_open (e);
+}
+
+inline void serializer::
+next_close (const std::string& name)
+{
+ serialize_close (name);
}
#endif // SERIALIZER_HXX