diff options
author | Boris Kolpackov <boris@codesynthesis.com> | 2015-09-10 12:18:02 +0200 |
---|---|---|
committer | Boris Kolpackov <boris@codesynthesis.com> | 2015-09-10 12:18:02 +0200 |
commit | 8ffa44b96b668b0286aa73cc331b59d4cef38d48 (patch) | |
tree | f877fdad915719673606aff7b8b7e693c7baf376 /examples/cxx/tree | |
parent | aabb88ef1fc3ada75b7210b73c05e73af5c653a8 (diff) |
Add example of more secure XML parsing
Diffstat (limited to 'examples/cxx/tree')
-rw-r--r-- | examples/cxx/tree/README | 4 | ||||
-rw-r--r-- | examples/cxx/tree/makefile | 4 | ||||
-rw-r--r-- | examples/cxx/tree/secure/README | 41 | ||||
-rw-r--r-- | examples/cxx/tree/secure/driver.cxx | 141 | ||||
-rw-r--r-- | examples/cxx/tree/secure/library.xml | 59 | ||||
-rw-r--r-- | examples/cxx/tree/secure/library.xsd | 72 | ||||
-rw-r--r-- | examples/cxx/tree/secure/makefile | 104 | ||||
-rw-r--r-- | examples/cxx/tree/secure/secure-dom-parser.cxx | 24 | ||||
-rw-r--r-- | examples/cxx/tree/secure/secure-dom-parser.hxx | 25 |
9 files changed, 472 insertions, 2 deletions
diff --git a/examples/cxx/tree/README b/examples/cxx/tree/README index 569bf11..83ffcab 100644 --- a/examples/cxx/tree/README +++ b/examples/cxx/tree/README @@ -52,6 +52,10 @@ embedded into an application and then use it with the C++/Tree mapping to parse and validate XML documents. +secure + Shows how to perform more secure XML parsing by disabling the XML + External Entity (XXE) Processing. + performance Measures the performance of parsing and serialization. This example also shows how to structure your code to achieve the maximum diff --git a/examples/cxx/tree/makefile b/examples/cxx/tree/makefile index 172195a..353ad6d 100644 --- a/examples/cxx/tree/makefile +++ b/examples/cxx/tree/makefile @@ -5,11 +5,11 @@ include $(dir $(lastword $(MAKEFILE_LIST)))../../../build/bootstrap.make all_examples := binary caching embedded custom hello library messaging \ -mixed multiroot order performance polymorphism streaming wildcard \ +mixed multiroot order performance polymorphism secure streaming wildcard \ compression xpath build_examples := binary caching embedded custom hello library messaging \ -mixed multiroot order performance polymorphism streaming wildcard +mixed multiroot order performance polymorphism secure streaming wildcard ifeq ($(xsd_with_zlib),y) build_examples += compression diff --git a/examples/cxx/tree/secure/README b/examples/cxx/tree/secure/README new file mode 100644 index 0000000..649f0a3 --- /dev/null +++ b/examples/cxx/tree/secure/README @@ -0,0 +1,41 @@ +This example shows how to perform more secure XML parsing by disabling +the XML External Entity (XXE) Processing. If XML Schema validation is +used, then it would also make sense to pre-load the known schemas and +to disable loading of any external schemas, for example, via the +schemaLocation attribute found in the XML documents. See the comment +in driver.cxx for more information on how to achieve this. + +The example consists of the following files: + +library.xsd + XML Schema which describes a library of books. + +library.xml + Sample XML instance document. It includes (commented out) DOCTYPE + declarations with internal and external subsets that the parser + will refuse to process. + +library.hxx +library.cxx + C++ types that represent the given vocabulary and a set of parsing + functions that convert XML instance documents to a tree-like in-memory + object model. These are generated by the XSD compiler from library.xsd. + +secure-dom-parser.hxx +secure-dom-parser.cxx + A secure Xerces-C++ DOM parser implementation that disables processing + of internal/external DTD subsets. + +driver.cxx + Driver for the example. It first sets up the secure DOM parser. It then + parses the input file to a DOM document using the secure DOM parser and + calls one of the parsing functions that constructs the object model from + this DOM document. Finally, the driver prints a number of books in the + object model to STDERR. + +To run the example on the sample XML instance document simply execute: + +$ ./driver library.xml + +To verify that DTD processing is disabled, uncomment a different DOCTYPE +version in the sample document. diff --git a/examples/cxx/tree/secure/driver.cxx b/examples/cxx/tree/secure/driver.cxx new file mode 100644 index 0000000..2dfb3bf --- /dev/null +++ b/examples/cxx/tree/secure/driver.cxx @@ -0,0 +1,141 @@ +// file : examples/cxx/tree/secure/driver.cxx +// copyright : not copyrighted - public domain + +#include <memory> // std::auto_ptr +#include <fstream> +#include <iostream> + +#include <xercesc/dom/DOM.hpp> +#include <xercesc/util/PlatformUtils.hpp> +#include <xercesc/framework/Wrapper4InputSource.hpp> + +#include <xsd/cxx/xml/string.hxx> +#include <xsd/cxx/xml/dom/auto-ptr.hxx> +#include <xsd/cxx/xml/dom/bits/error-handler-proxy.hxx> +#include <xsd/cxx/xml/sax/std-input-source.hxx> + +#include <xsd/cxx/tree/error-handler.hxx> + +#include "library.hxx" +#include "secure-dom-parser.hxx" + +using namespace std; + +int +main (int argc, char* argv[]) +{ + if (argc != 2) + { + cerr << "usage: " << argv[0] << " library.xml" << endl; + return 1; + } + + int r (0); + + // We need to initialize the Xerces-C++ runtime because we + // are doing the XML-to-DOM parsing ourselves. + // + xercesc::XMLPlatformUtils::Initialize (); + + try + { + using namespace xercesc; + namespace xml = xsd::cxx::xml; + namespace tree = xsd::cxx::tree; + + xml::dom::auto_ptr<DOMLSParser> parser (new SecureDOMParser ()); + + DOMConfiguration* conf (parser->getDomConfig ()); + + // Discard comment nodes in the document. + // + conf->setParameter (XMLUni::fgDOMComments, false); + + // Enable datatype normalization. + // + conf->setParameter (XMLUni::fgDOMDatatypeNormalization, true); + + // Do not create EntityReference nodes in the DOM tree. No + // EntityReference nodes will be created, only the nodes + // corresponding to their fully expanded substitution text + // will be created. + // + conf->setParameter (XMLUni::fgDOMEntities, false); + + // Perform namespace processing. + // + conf->setParameter (XMLUni::fgDOMNamespaces, true); + + // Do not include ignorable whitespace in the DOM tree. + // + conf->setParameter (XMLUni::fgDOMElementContentWhitespace, false); + + // Enable validation. + // + conf->setParameter (XMLUni::fgDOMValidate, true); + conf->setParameter (XMLUni::fgXercesSchema, true); + conf->setParameter (XMLUni::fgXercesSchemaFullChecking, false); + + // Xerces-C++ 3.1.0 is the first version with working multi import + // support. + // +#if _XERCES_VERSION >= 30100 + conf->setParameter (XMLUni::fgXercesHandleMultipleImports, true); +#endif + + // Disable loading schemas via other means (e.g., schemaLocation). + // + // Note: this might be a good idea though if you need validation, + // you will need to pre-load the schema via other means. See the + // 'caching' and 'embedded' examples for different approaches. + // Both of them can be used with SecureDOMParser. + // + // conf->setParameter (XMLUni::fgXercesLoadSchema, false); + + // We will release the DOM document ourselves. + // + conf->setParameter (XMLUni::fgXercesUserAdoptsDOMDocument, true); + + // Set error handler. + // + tree::error_handler<char> eh; + xml::dom::bits::error_handler_proxy<char> ehp (eh); + conf->setParameter (XMLUni::fgDOMErrorHandler, &ehp); + + // Parse the XML document. + // + ifstream ifs; + ifs.exceptions (ifstream::badbit | ifstream::failbit); + ifs.open (argv[1]); + + // Wrap the standard input stream. + // + xml::sax::std_input_source isrc (ifs, argv[1]); + Wrapper4InputSource wrap (&isrc, false); + + // Parse XML to DOM. + // + xml_schema::dom::auto_ptr<DOMDocument> doc (parser->parse (&wrap)); + + eh.throw_if_failed<xml_schema::parsing> (); + + // Parse DOM to the object model. + // + auto_ptr<library::catalog> c (library::catalog_ (*doc)); + + cerr << "catalog with " << c->book ().size () << " books" << endl; + } + catch (const xml_schema::exception& e) + { + cerr << e << endl; + r = 1; + } + catch (const std::ios_base::failure&) + { + cerr << argv[1] << ": unable to open or read failure" << endl; + r = 1; + } + + xercesc::XMLPlatformUtils::Terminate (); + return r; +} diff --git a/examples/cxx/tree/secure/library.xml b/examples/cxx/tree/secure/library.xml new file mode 100644 index 0000000..33a2041 --- /dev/null +++ b/examples/cxx/tree/secure/library.xml @@ -0,0 +1,59 @@ +<?xml version="1.0"?> +<!DOCTYPE lib:catalog> +<!-- +<!DOCTYPE lib:catalog [<!ENTITY xxe SYSTEM "file:///dev/random" >]> +--> +<!-- +<!DOCTYPE lib:catalog PUBLIC "public id" "http://example.org"> +--> + +<!-- + +file : examples/cxx/tree/secure/library.xml +copyright : not copyrighted - public domain + +--> + +<lib:catalog xmlns:lib="http://www.codesynthesis.com/library" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://www.codesynthesis.com/library library.xsd"> + + <book id="MM" available="false"> + <isbn>0679760806</isbn> + <title>The Master and Margarita</title> + <genre>fiction</genre> + + <author recommends="WP"> + <name>Mikhail Bulgakov</name> + <born>1891-05-15</born> + <died>1940-03-10</died> + </author> + </book> + + + <book id="WP"> + <isbn>0679600841</isbn> + <title>War and Peace</title> + <genre>history</genre> + + <author recommends="CP"> + <name>Leo Tolstoy</name> + <born>1828-09-09</born> + <died>1910-11-20</died> + </author> + </book> + + + <book id="CP" available="false"> + <isbn>0679420290</isbn> + <title>Crime and Punishment</title> + <genre>philosophy</genre> + + <author> + <name>Fyodor Dostoevsky</name> + <born>1821-11-11</born> + <died>1881-02-09</died> + </author> + </book> + +</lib:catalog> diff --git a/examples/cxx/tree/secure/library.xsd b/examples/cxx/tree/secure/library.xsd new file mode 100644 index 0000000..afad24b --- /dev/null +++ b/examples/cxx/tree/secure/library.xsd @@ -0,0 +1,72 @@ +<?xml version="1.0"?> + +<!-- + +file : examples/cxx/tree/secure/library.xsd +copyright : not copyrighted - public domain + +--> + +<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" + xmlns:xse="http://www.codesynthesis.com/xmlns/xml-schema-extension" + xmlns:lib="http://www.codesynthesis.com/library" + targetNamespace="http://www.codesynthesis.com/library"> + + <xsd:simpleType name="isbn"> + <xsd:restriction base="xsd:unsignedInt"/> + </xsd:simpleType> + + <xsd:complexType name="title"> + <xsd:simpleContent> + <xsd:extension base="xsd:string"> + <xsd:attribute name="lang" type="xsd:language"/> + </xsd:extension> + </xsd:simpleContent> + </xsd:complexType> + + <xsd:simpleType name="genre"> + <xsd:restriction base="xsd:string"> + <xsd:enumeration value="romance"/> + <xsd:enumeration value="fiction"/> + <xsd:enumeration value="horror"/> + <xsd:enumeration value="history"/> + <xsd:enumeration value="philosophy"/> + </xsd:restriction> + </xsd:simpleType> + + <xsd:complexType name="person"> + <xsd:sequence> + <xsd:element name="name" type="xsd:string"/> + <xsd:element name="born" type="xsd:date"/> + <xsd:element name="died" type="xsd:date" minOccurs="0"/> + </xsd:sequence> + </xsd:complexType> + + <xsd:complexType name="author"> + <xsd:complexContent> + <xsd:extension base="lib:person"> + <xsd:attribute name="recommends" type="xsd:IDREF" xse:refType="lib:book"/> + </xsd:extension> + </xsd:complexContent> + </xsd:complexType> + + <xsd:complexType name="book"> + <xsd:sequence> + <xsd:element name="isbn" type="lib:isbn"/> + <xsd:element name="title" type="lib:title"/> + <xsd:element name="genre" type="lib:genre"/> + <xsd:element name="author" type="lib:author" maxOccurs="unbounded"/> + </xsd:sequence> + <xsd:attribute name="available" type="xsd:boolean" default="true"/> + <xsd:attribute name="id" type="xsd:ID" use="required"/> + </xsd:complexType> + + <xsd:complexType name="catalog"> + <xsd:sequence> + <xsd:element name="book" type="lib:book" maxOccurs="unbounded"/> + </xsd:sequence> + </xsd:complexType> + + <xsd:element name="catalog" type="lib:catalog"/> + +</xsd:schema> diff --git a/examples/cxx/tree/secure/makefile b/examples/cxx/tree/secure/makefile new file mode 100644 index 0000000..8615108 --- /dev/null +++ b/examples/cxx/tree/secure/makefile @@ -0,0 +1,104 @@ +# file : examples/cxx/tree/secure/makefile +# copyright : Copyright (c) 2005-2014 Code Synthesis Tools CC +# license : GNU GPL v2 + exceptions; see accompanying LICENSE file + +include $(dir $(lastword $(MAKEFILE_LIST)))../../../../build/bootstrap.make + +xsd := library.xsd +cxx := driver.cxx secure-dom-parser.cxx + +obj := $(addprefix $(out_base)/,$(cxx:.cxx=.o) $(xsd:.xsd=.o)) +dep := $(obj:.o=.o.d) + +driver := $(out_base)/driver +install := $(out_base)/.install +dist := $(out_base)/.dist +dist-win := $(out_base)/.dist-win +clean := $(out_base)/.clean + + +# Import. +# +$(call import,\ + $(scf_root)/import/libxerces-c/stub.make,\ + l: xerces_c.l,cpp-options: xerces_c.l.cpp-options) + + +# Build. +# +$(driver): $(obj) $(xerces_c.l) + +$(obj) $(dep): cpp_options := -I$(out_base) -I$(src_base) -I$(src_root)/libxsd +$(obj) $(dep): $(xerces_c.l.cpp-options) + +genf := $(xsd:.xsd=.hxx) $(xsd:.xsd=.ixx) $(xsd:.xsd=.cxx) +gen := $(addprefix $(out_base)/,$(genf)) + +$(gen): xsd := $(out_root)/xsd/xsd +$(gen): $(out_root)/xsd/xsd + +$(call include-dep,$(dep),$(obj),$(gen)) + +# Convenience alias for default target. +# +$(out_base)/: $(driver) + + +# Install & Dist. +# +dist-common := $(out_base)/.dist-common + +$(install) $(dist) $(dist-win) $(dist-common): path := $(subst $(src_root)/,,$(src_base)) + +$(install): + $(call install-data,$(src_base)/README,$(install_doc_dir)/xsd/$(path)/README) + $(call install-data,$(src_base)/driver.cxx,$(install_doc_dir)/xsd/$(path)/driver.cxx) + $(call install-data,$(src_base)/library.xsd,$(install_doc_dir)/xsd/$(path)/library.xsd) + $(call install-data,$(src_base)/library.xml,$(install_doc_dir)/xsd/$(path)/library.xml) + $(call install-data,$(src_base)/secure-dom-parser.hxx,$(install_doc_dir)/xsd/$(path)/secure-dom-parser.hxx) + $(call install-data,$(src_base)/secure-dom-parser.cxx,$(install_doc_dir)/xsd/$(path)/secure-dom-parser.cxx) + +$(dist-common): + $(call install-data,$(src_base)/driver.cxx,$(dist_prefix)/$(path)/driver.cxx) + $(call install-data,$(src_base)/library.xsd,$(dist_prefix)/$(path)/library.xsd) + $(call install-data,$(src_base)/library.xml,$(dist_prefix)/$(path)/library.xml) + $(call install-data,$(src_base)/secure-dom-parser.hxx,$(dist_prefix)/$(path)/secure-dom-parser.hxx) + $(call install-data,$(src_base)/secure-dom-parser.cxx,$(dist_prefix)/$(path)/secure-dom-parser.cxx) + +$(dist): $(dist-common) + $(call install-data,$(src_base)/README,$(dist_prefix)/$(path)/README) + +$(dist-win): $(dist-common) + $(call install-data,$(src_base)/README,$(dist_prefix)/$(path)/README.txt) + $(call message,,todos $(dist_prefix)/$(path)/README.txt) + +# Clean. +# +$(clean): $(driver).o.clean \ + $(addsuffix .cxx.clean,$(obj)) \ + $(addsuffix .cxx.clean,$(dep)) \ + $(addprefix $(out_base)/,$(xsd:.xsd=.cxx.xsd.clean)) + +# Generated .gitignore. +# +ifeq ($(out_base),$(src_base)) +$(gen): | $(out_base)/.gitignore +$(driver): | $(out_base)/.gitignore + +$(out_base)/.gitignore: files := driver $(genf) +$(clean): $(out_base)/.gitignore.clean + +$(call include,$(bld_root)/git/gitignore.make) +endif + +# How to. +# +$(call include,$(bld_root)/cxx/o-e.make) +$(call include,$(bld_root)/cxx/cxx-o.make) +$(call include,$(bld_root)/cxx/cxx-d.make) +$(call include,$(bld_root)/install.make) +$(call include,$(scf_root)/xsd/tree/xsd-cxx.make) + +# Dependencies. +# +$(call import,$(src_root)/xsd/makefile) diff --git a/examples/cxx/tree/secure/secure-dom-parser.cxx b/examples/cxx/tree/secure/secure-dom-parser.cxx new file mode 100644 index 0000000..9008e1e --- /dev/null +++ b/examples/cxx/tree/secure/secure-dom-parser.cxx @@ -0,0 +1,24 @@ +// file : examples/cxx/tree/secure/secure-dom-parser.cxx +// copyright : not copyrighted - public domain + +#include "secure-dom-parser.hxx" + +#include <xercesc/util/XMLException.hpp> +#include <xercesc/util/XMLExceptMsgs.hpp> + +using namespace xercesc; + +void SecureDOMParser:: +doctypeDecl (const DTDElementDecl& e, + const XMLCh* const pub_id, + const XMLCh* const sys_id, + const bool hasi, + const bool hase) +{ + if (hasi || hase) + ThrowXMLwithMemMgr(RuntimeException, + XMLExcepts::Gen_NoDTDValidator, + fMemoryManager); + + DOMLSParserImpl::doctypeDecl (e, pub_id, sys_id, hasi, hase); +} diff --git a/examples/cxx/tree/secure/secure-dom-parser.hxx b/examples/cxx/tree/secure/secure-dom-parser.hxx new file mode 100644 index 0000000..20783b8 --- /dev/null +++ b/examples/cxx/tree/secure/secure-dom-parser.hxx @@ -0,0 +1,25 @@ +// file : examples/cxx/tree/secure/secure-dom-parser.hxx +// copyright : not copyrighted - public domain + +#ifndef SECURE_DOM_PARSER_HXX +#define SECURE_DOM_PARSER_HXX + +#include <xercesc/parsers/DOMLSParserImpl.hpp> + +class SecureDOMParser: public xercesc::DOMLSParserImpl +{ +public: + SecureDOMParser (xercesc::MemoryManager* mm = + xercesc::XMLPlatformUtils::fgMemoryManager, + xercesc::XMLGrammarPool* gp = 0) + : DOMLSParserImpl (0, mm, gp) {} + + virtual void + doctypeDecl (const xercesc::DTDElementDecl& root, + const XMLCh* const public_id, + const XMLCh* const system_id, + const bool has_internal, + const bool has_external); +}; + +#endif // SECURE_DOM_PARSER_HXX |