summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2015-09-10 12:18:02 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2015-09-10 12:18:02 +0200
commit8ffa44b96b668b0286aa73cc331b59d4cef38d48 (patch)
treef877fdad915719673606aff7b8b7e693c7baf376
parentaabb88ef1fc3ada75b7210b73c05e73af5c653a8 (diff)
Add example of more secure XML parsing
-rw-r--r--dist/examples/cxx/tree/makefile2
-rw-r--r--dist/examples/cxx/tree/secure/makefile28
-rw-r--r--examples/cxx/tree/README4
-rw-r--r--examples/cxx/tree/makefile4
-rw-r--r--examples/cxx/tree/secure/README41
-rw-r--r--examples/cxx/tree/secure/driver.cxx141
-rw-r--r--examples/cxx/tree/secure/library.xml59
-rw-r--r--examples/cxx/tree/secure/library.xsd72
-rw-r--r--examples/cxx/tree/secure/makefile104
-rw-r--r--examples/cxx/tree/secure/secure-dom-parser.cxx24
-rw-r--r--examples/cxx/tree/secure/secure-dom-parser.hxx25
11 files changed, 501 insertions, 3 deletions
diff --git a/dist/examples/cxx/tree/makefile b/dist/examples/cxx/tree/makefile
index 44e7a0f..32b44de 100644
--- a/dist/examples/cxx/tree/makefile
+++ b/dist/examples/cxx/tree/makefile
@@ -1,5 +1,5 @@
dirs := binary caching embedded custom hello library messaging mixed \
-multiroot order performance polymorphism streaming wildcard
+multiroot order performance polymorphism secure streaming wildcard
ifeq ($(WITH_ZLIB),1)
dirs += compression
diff --git a/dist/examples/cxx/tree/secure/makefile b/dist/examples/cxx/tree/secure/makefile
new file mode 100644
index 0000000..4898a10
--- /dev/null
+++ b/dist/examples/cxx/tree/secure/makefile
@@ -0,0 +1,28 @@
+root := ../../..
+
+include $(root)/build/cxx/rules.make
+include $(root)/build/xsd/tree-rules.make
+
+# Build.
+#
+driver: driver.o secure-dom-parser.o library.o
+
+library.o: library.cxx library.hxx
+secure-dom-parser.o: secure-dom-parser.cxx secure-dom-parser.hxx
+driver.o: driver.cxx secure-dom-parser.hxx library.hxx
+
+library.cxx library.hxx: library.xsd
+
+
+# Test
+#
+.PHONY: test
+test: driver library.xml
+ ./driver library.xml
+
+
+# Clean.
+#
+.PHONY: clean
+clean:
+ rm -f library.o library.?xx secure-dom-parser.o driver.o driver
diff --git a/examples/cxx/tree/README b/examples/cxx/tree/README
index 569bf11..83ffcab 100644
--- a/examples/cxx/tree/README
+++ b/examples/cxx/tree/README
@@ -52,6 +52,10 @@ embedded
into an application and then use it with the C++/Tree mapping to
parse and validate XML documents.
+secure
+ Shows how to perform more secure XML parsing by disabling the XML
+ External Entity (XXE) Processing.
+
performance
Measures the performance of parsing and serialization. This example
also shows how to structure your code to achieve the maximum
diff --git a/examples/cxx/tree/makefile b/examples/cxx/tree/makefile
index 172195a..353ad6d 100644
--- a/examples/cxx/tree/makefile
+++ b/examples/cxx/tree/makefile
@@ -5,11 +5,11 @@
include $(dir $(lastword $(MAKEFILE_LIST)))../../../build/bootstrap.make
all_examples := binary caching embedded custom hello library messaging \
-mixed multiroot order performance polymorphism streaming wildcard \
+mixed multiroot order performance polymorphism secure streaming wildcard \
compression xpath
build_examples := binary caching embedded custom hello library messaging \
-mixed multiroot order performance polymorphism streaming wildcard
+mixed multiroot order performance polymorphism secure streaming wildcard
ifeq ($(xsd_with_zlib),y)
build_examples += compression
diff --git a/examples/cxx/tree/secure/README b/examples/cxx/tree/secure/README
new file mode 100644
index 0000000..649f0a3
--- /dev/null
+++ b/examples/cxx/tree/secure/README
@@ -0,0 +1,41 @@
+This example shows how to perform more secure XML parsing by disabling
+the XML External Entity (XXE) Processing. If XML Schema validation is
+used, then it would also make sense to pre-load the known schemas and
+to disable loading of any external schemas, for example, via the
+schemaLocation attribute found in the XML documents. See the comment
+in driver.cxx for more information on how to achieve this.
+
+The example consists of the following files:
+
+library.xsd
+ XML Schema which describes a library of books.
+
+library.xml
+ Sample XML instance document. It includes (commented out) DOCTYPE
+ declarations with internal and external subsets that the parser
+ will refuse to process.
+
+library.hxx
+library.cxx
+ C++ types that represent the given vocabulary and a set of parsing
+ functions that convert XML instance documents to a tree-like in-memory
+ object model. These are generated by the XSD compiler from library.xsd.
+
+secure-dom-parser.hxx
+secure-dom-parser.cxx
+ A secure Xerces-C++ DOM parser implementation that disables processing
+ of internal/external DTD subsets.
+
+driver.cxx
+ Driver for the example. It first sets up the secure DOM parser. It then
+ parses the input file to a DOM document using the secure DOM parser and
+ calls one of the parsing functions that constructs the object model from
+ this DOM document. Finally, the driver prints a number of books in the
+ object model to STDERR.
+
+To run the example on the sample XML instance document simply execute:
+
+$ ./driver library.xml
+
+To verify that DTD processing is disabled, uncomment a different DOCTYPE
+version in the sample document.
diff --git a/examples/cxx/tree/secure/driver.cxx b/examples/cxx/tree/secure/driver.cxx
new file mode 100644
index 0000000..2dfb3bf
--- /dev/null
+++ b/examples/cxx/tree/secure/driver.cxx
@@ -0,0 +1,141 @@
+// file : examples/cxx/tree/secure/driver.cxx
+// copyright : not copyrighted - public domain
+
+#include <memory> // std::auto_ptr
+#include <fstream>
+#include <iostream>
+
+#include <xercesc/dom/DOM.hpp>
+#include <xercesc/util/PlatformUtils.hpp>
+#include <xercesc/framework/Wrapper4InputSource.hpp>
+
+#include <xsd/cxx/xml/string.hxx>
+#include <xsd/cxx/xml/dom/auto-ptr.hxx>
+#include <xsd/cxx/xml/dom/bits/error-handler-proxy.hxx>
+#include <xsd/cxx/xml/sax/std-input-source.hxx>
+
+#include <xsd/cxx/tree/error-handler.hxx>
+
+#include "library.hxx"
+#include "secure-dom-parser.hxx"
+
+using namespace std;
+
+int
+main (int argc, char* argv[])
+{
+ if (argc != 2)
+ {
+ cerr << "usage: " << argv[0] << " library.xml" << endl;
+ return 1;
+ }
+
+ int r (0);
+
+ // We need to initialize the Xerces-C++ runtime because we
+ // are doing the XML-to-DOM parsing ourselves.
+ //
+ xercesc::XMLPlatformUtils::Initialize ();
+
+ try
+ {
+ using namespace xercesc;
+ namespace xml = xsd::cxx::xml;
+ namespace tree = xsd::cxx::tree;
+
+ xml::dom::auto_ptr<DOMLSParser> parser (new SecureDOMParser ());
+
+ DOMConfiguration* conf (parser->getDomConfig ());
+
+ // Discard comment nodes in the document.
+ //
+ conf->setParameter (XMLUni::fgDOMComments, false);
+
+ // Enable datatype normalization.
+ //
+ conf->setParameter (XMLUni::fgDOMDatatypeNormalization, true);
+
+ // Do not create EntityReference nodes in the DOM tree. No
+ // EntityReference nodes will be created, only the nodes
+ // corresponding to their fully expanded substitution text
+ // will be created.
+ //
+ conf->setParameter (XMLUni::fgDOMEntities, false);
+
+ // Perform namespace processing.
+ //
+ conf->setParameter (XMLUni::fgDOMNamespaces, true);
+
+ // Do not include ignorable whitespace in the DOM tree.
+ //
+ conf->setParameter (XMLUni::fgDOMElementContentWhitespace, false);
+
+ // Enable validation.
+ //
+ conf->setParameter (XMLUni::fgDOMValidate, true);
+ conf->setParameter (XMLUni::fgXercesSchema, true);
+ conf->setParameter (XMLUni::fgXercesSchemaFullChecking, false);
+
+ // Xerces-C++ 3.1.0 is the first version with working multi import
+ // support.
+ //
+#if _XERCES_VERSION >= 30100
+ conf->setParameter (XMLUni::fgXercesHandleMultipleImports, true);
+#endif
+
+ // Disable loading schemas via other means (e.g., schemaLocation).
+ //
+ // Note: this might be a good idea though if you need validation,
+ // you will need to pre-load the schema via other means. See the
+ // 'caching' and 'embedded' examples for different approaches.
+ // Both of them can be used with SecureDOMParser.
+ //
+ // conf->setParameter (XMLUni::fgXercesLoadSchema, false);
+
+ // We will release the DOM document ourselves.
+ //
+ conf->setParameter (XMLUni::fgXercesUserAdoptsDOMDocument, true);
+
+ // Set error handler.
+ //
+ tree::error_handler<char> eh;
+ xml::dom::bits::error_handler_proxy<char> ehp (eh);
+ conf->setParameter (XMLUni::fgDOMErrorHandler, &ehp);
+
+ // Parse the XML document.
+ //
+ ifstream ifs;
+ ifs.exceptions (ifstream::badbit | ifstream::failbit);
+ ifs.open (argv[1]);
+
+ // Wrap the standard input stream.
+ //
+ xml::sax::std_input_source isrc (ifs, argv[1]);
+ Wrapper4InputSource wrap (&isrc, false);
+
+ // Parse XML to DOM.
+ //
+ xml_schema::dom::auto_ptr<DOMDocument> doc (parser->parse (&wrap));
+
+ eh.throw_if_failed<xml_schema::parsing> ();
+
+ // Parse DOM to the object model.
+ //
+ auto_ptr<library::catalog> c (library::catalog_ (*doc));
+
+ cerr << "catalog with " << c->book ().size () << " books" << endl;
+ }
+ catch (const xml_schema::exception& e)
+ {
+ cerr << e << endl;
+ r = 1;
+ }
+ catch (const std::ios_base::failure&)
+ {
+ cerr << argv[1] << ": unable to open or read failure" << endl;
+ r = 1;
+ }
+
+ xercesc::XMLPlatformUtils::Terminate ();
+ return r;
+}
diff --git a/examples/cxx/tree/secure/library.xml b/examples/cxx/tree/secure/library.xml
new file mode 100644
index 0000000..33a2041
--- /dev/null
+++ b/examples/cxx/tree/secure/library.xml
@@ -0,0 +1,59 @@
+<?xml version="1.0"?>
+<!DOCTYPE lib:catalog>
+<!--
+<!DOCTYPE lib:catalog [<!ENTITY xxe SYSTEM "file:///dev/random" >]>
+-->
+<!--
+<!DOCTYPE lib:catalog PUBLIC "public id" "http://example.org">
+-->
+
+<!--
+
+file : examples/cxx/tree/secure/library.xml
+copyright : not copyrighted - public domain
+
+-->
+
+<lib:catalog xmlns:lib="http://www.codesynthesis.com/library"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.codesynthesis.com/library library.xsd">
+
+ <book id="MM" available="false">
+ <isbn>0679760806</isbn>
+ <title>The Master and Margarita</title>
+ <genre>fiction</genre>
+
+ <author recommends="WP">
+ <name>Mikhail Bulgakov</name>
+ <born>1891-05-15</born>
+ <died>1940-03-10</died>
+ </author>
+ </book>
+
+
+ <book id="WP">
+ <isbn>0679600841</isbn>
+ <title>War and Peace</title>
+ <genre>history</genre>
+
+ <author recommends="CP">
+ <name>Leo Tolstoy</name>
+ <born>1828-09-09</born>
+ <died>1910-11-20</died>
+ </author>
+ </book>
+
+
+ <book id="CP" available="false">
+ <isbn>0679420290</isbn>
+ <title>Crime and Punishment</title>
+ <genre>philosophy</genre>
+
+ <author>
+ <name>Fyodor Dostoevsky</name>
+ <born>1821-11-11</born>
+ <died>1881-02-09</died>
+ </author>
+ </book>
+
+</lib:catalog>
diff --git a/examples/cxx/tree/secure/library.xsd b/examples/cxx/tree/secure/library.xsd
new file mode 100644
index 0000000..afad24b
--- /dev/null
+++ b/examples/cxx/tree/secure/library.xsd
@@ -0,0 +1,72 @@
+<?xml version="1.0"?>
+
+<!--
+
+file : examples/cxx/tree/secure/library.xsd
+copyright : not copyrighted - public domain
+
+-->
+
+<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
+ xmlns:xse="http://www.codesynthesis.com/xmlns/xml-schema-extension"
+ xmlns:lib="http://www.codesynthesis.com/library"
+ targetNamespace="http://www.codesynthesis.com/library">
+
+ <xsd:simpleType name="isbn">
+ <xsd:restriction base="xsd:unsignedInt"/>
+ </xsd:simpleType>
+
+ <xsd:complexType name="title">
+ <xsd:simpleContent>
+ <xsd:extension base="xsd:string">
+ <xsd:attribute name="lang" type="xsd:language"/>
+ </xsd:extension>
+ </xsd:simpleContent>
+ </xsd:complexType>
+
+ <xsd:simpleType name="genre">
+ <xsd:restriction base="xsd:string">
+ <xsd:enumeration value="romance"/>
+ <xsd:enumeration value="fiction"/>
+ <xsd:enumeration value="horror"/>
+ <xsd:enumeration value="history"/>
+ <xsd:enumeration value="philosophy"/>
+ </xsd:restriction>
+ </xsd:simpleType>
+
+ <xsd:complexType name="person">
+ <xsd:sequence>
+ <xsd:element name="name" type="xsd:string"/>
+ <xsd:element name="born" type="xsd:date"/>
+ <xsd:element name="died" type="xsd:date" minOccurs="0"/>
+ </xsd:sequence>
+ </xsd:complexType>
+
+ <xsd:complexType name="author">
+ <xsd:complexContent>
+ <xsd:extension base="lib:person">
+ <xsd:attribute name="recommends" type="xsd:IDREF" xse:refType="lib:book"/>
+ </xsd:extension>
+ </xsd:complexContent>
+ </xsd:complexType>
+
+ <xsd:complexType name="book">
+ <xsd:sequence>
+ <xsd:element name="isbn" type="lib:isbn"/>
+ <xsd:element name="title" type="lib:title"/>
+ <xsd:element name="genre" type="lib:genre"/>
+ <xsd:element name="author" type="lib:author" maxOccurs="unbounded"/>
+ </xsd:sequence>
+ <xsd:attribute name="available" type="xsd:boolean" default="true"/>
+ <xsd:attribute name="id" type="xsd:ID" use="required"/>
+ </xsd:complexType>
+
+ <xsd:complexType name="catalog">
+ <xsd:sequence>
+ <xsd:element name="book" type="lib:book" maxOccurs="unbounded"/>
+ </xsd:sequence>
+ </xsd:complexType>
+
+ <xsd:element name="catalog" type="lib:catalog"/>
+
+</xsd:schema>
diff --git a/examples/cxx/tree/secure/makefile b/examples/cxx/tree/secure/makefile
new file mode 100644
index 0000000..8615108
--- /dev/null
+++ b/examples/cxx/tree/secure/makefile
@@ -0,0 +1,104 @@
+# file : examples/cxx/tree/secure/makefile
+# copyright : Copyright (c) 2005-2014 Code Synthesis Tools CC
+# license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+include $(dir $(lastword $(MAKEFILE_LIST)))../../../../build/bootstrap.make
+
+xsd := library.xsd
+cxx := driver.cxx secure-dom-parser.cxx
+
+obj := $(addprefix $(out_base)/,$(cxx:.cxx=.o) $(xsd:.xsd=.o))
+dep := $(obj:.o=.o.d)
+
+driver := $(out_base)/driver
+install := $(out_base)/.install
+dist := $(out_base)/.dist
+dist-win := $(out_base)/.dist-win
+clean := $(out_base)/.clean
+
+
+# Import.
+#
+$(call import,\
+ $(scf_root)/import/libxerces-c/stub.make,\
+ l: xerces_c.l,cpp-options: xerces_c.l.cpp-options)
+
+
+# Build.
+#
+$(driver): $(obj) $(xerces_c.l)
+
+$(obj) $(dep): cpp_options := -I$(out_base) -I$(src_base) -I$(src_root)/libxsd
+$(obj) $(dep): $(xerces_c.l.cpp-options)
+
+genf := $(xsd:.xsd=.hxx) $(xsd:.xsd=.ixx) $(xsd:.xsd=.cxx)
+gen := $(addprefix $(out_base)/,$(genf))
+
+$(gen): xsd := $(out_root)/xsd/xsd
+$(gen): $(out_root)/xsd/xsd
+
+$(call include-dep,$(dep),$(obj),$(gen))
+
+# Convenience alias for default target.
+#
+$(out_base)/: $(driver)
+
+
+# Install & Dist.
+#
+dist-common := $(out_base)/.dist-common
+
+$(install) $(dist) $(dist-win) $(dist-common): path := $(subst $(src_root)/,,$(src_base))
+
+$(install):
+ $(call install-data,$(src_base)/README,$(install_doc_dir)/xsd/$(path)/README)
+ $(call install-data,$(src_base)/driver.cxx,$(install_doc_dir)/xsd/$(path)/driver.cxx)
+ $(call install-data,$(src_base)/library.xsd,$(install_doc_dir)/xsd/$(path)/library.xsd)
+ $(call install-data,$(src_base)/library.xml,$(install_doc_dir)/xsd/$(path)/library.xml)
+ $(call install-data,$(src_base)/secure-dom-parser.hxx,$(install_doc_dir)/xsd/$(path)/secure-dom-parser.hxx)
+ $(call install-data,$(src_base)/secure-dom-parser.cxx,$(install_doc_dir)/xsd/$(path)/secure-dom-parser.cxx)
+
+$(dist-common):
+ $(call install-data,$(src_base)/driver.cxx,$(dist_prefix)/$(path)/driver.cxx)
+ $(call install-data,$(src_base)/library.xsd,$(dist_prefix)/$(path)/library.xsd)
+ $(call install-data,$(src_base)/library.xml,$(dist_prefix)/$(path)/library.xml)
+ $(call install-data,$(src_base)/secure-dom-parser.hxx,$(dist_prefix)/$(path)/secure-dom-parser.hxx)
+ $(call install-data,$(src_base)/secure-dom-parser.cxx,$(dist_prefix)/$(path)/secure-dom-parser.cxx)
+
+$(dist): $(dist-common)
+ $(call install-data,$(src_base)/README,$(dist_prefix)/$(path)/README)
+
+$(dist-win): $(dist-common)
+ $(call install-data,$(src_base)/README,$(dist_prefix)/$(path)/README.txt)
+ $(call message,,todos $(dist_prefix)/$(path)/README.txt)
+
+# Clean.
+#
+$(clean): $(driver).o.clean \
+ $(addsuffix .cxx.clean,$(obj)) \
+ $(addsuffix .cxx.clean,$(dep)) \
+ $(addprefix $(out_base)/,$(xsd:.xsd=.cxx.xsd.clean))
+
+# Generated .gitignore.
+#
+ifeq ($(out_base),$(src_base))
+$(gen): | $(out_base)/.gitignore
+$(driver): | $(out_base)/.gitignore
+
+$(out_base)/.gitignore: files := driver $(genf)
+$(clean): $(out_base)/.gitignore.clean
+
+$(call include,$(bld_root)/git/gitignore.make)
+endif
+
+# How to.
+#
+$(call include,$(bld_root)/cxx/o-e.make)
+$(call include,$(bld_root)/cxx/cxx-o.make)
+$(call include,$(bld_root)/cxx/cxx-d.make)
+$(call include,$(bld_root)/install.make)
+$(call include,$(scf_root)/xsd/tree/xsd-cxx.make)
+
+# Dependencies.
+#
+$(call import,$(src_root)/xsd/makefile)
diff --git a/examples/cxx/tree/secure/secure-dom-parser.cxx b/examples/cxx/tree/secure/secure-dom-parser.cxx
new file mode 100644
index 0000000..9008e1e
--- /dev/null
+++ b/examples/cxx/tree/secure/secure-dom-parser.cxx
@@ -0,0 +1,24 @@
+// file : examples/cxx/tree/secure/secure-dom-parser.cxx
+// copyright : not copyrighted - public domain
+
+#include "secure-dom-parser.hxx"
+
+#include <xercesc/util/XMLException.hpp>
+#include <xercesc/util/XMLExceptMsgs.hpp>
+
+using namespace xercesc;
+
+void SecureDOMParser::
+doctypeDecl (const DTDElementDecl& e,
+ const XMLCh* const pub_id,
+ const XMLCh* const sys_id,
+ const bool hasi,
+ const bool hase)
+{
+ if (hasi || hase)
+ ThrowXMLwithMemMgr(RuntimeException,
+ XMLExcepts::Gen_NoDTDValidator,
+ fMemoryManager);
+
+ DOMLSParserImpl::doctypeDecl (e, pub_id, sys_id, hasi, hase);
+}
diff --git a/examples/cxx/tree/secure/secure-dom-parser.hxx b/examples/cxx/tree/secure/secure-dom-parser.hxx
new file mode 100644
index 0000000..20783b8
--- /dev/null
+++ b/examples/cxx/tree/secure/secure-dom-parser.hxx
@@ -0,0 +1,25 @@
+// file : examples/cxx/tree/secure/secure-dom-parser.hxx
+// copyright : not copyrighted - public domain
+
+#ifndef SECURE_DOM_PARSER_HXX
+#define SECURE_DOM_PARSER_HXX
+
+#include <xercesc/parsers/DOMLSParserImpl.hpp>
+
+class SecureDOMParser: public xercesc::DOMLSParserImpl
+{
+public:
+ SecureDOMParser (xercesc::MemoryManager* mm =
+ xercesc::XMLPlatformUtils::fgMemoryManager,
+ xercesc::XMLGrammarPool* gp = 0)
+ : DOMLSParserImpl (0, mm, gp) {}
+
+ virtual void
+ doctypeDecl (const xercesc::DTDElementDecl& root,
+ const XMLCh* const public_id,
+ const XMLCh* const system_id,
+ const bool has_internal,
+ const bool has_external);
+};
+
+#endif // SECURE_DOM_PARSER_HXX