From 8ffa44b96b668b0286aa73cc331b59d4cef38d48 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 10 Sep 2015 12:18:02 +0200 Subject: Add example of more secure XML parsing --- dist/examples/cxx/tree/makefile | 2 +- dist/examples/cxx/tree/secure/makefile | 28 +++++ examples/cxx/tree/README | 4 + examples/cxx/tree/makefile | 4 +- examples/cxx/tree/secure/README | 41 +++++++ examples/cxx/tree/secure/driver.cxx | 141 +++++++++++++++++++++++++ examples/cxx/tree/secure/library.xml | 59 +++++++++++ examples/cxx/tree/secure/library.xsd | 72 +++++++++++++ examples/cxx/tree/secure/makefile | 104 ++++++++++++++++++ examples/cxx/tree/secure/secure-dom-parser.cxx | 24 +++++ examples/cxx/tree/secure/secure-dom-parser.hxx | 25 +++++ 11 files changed, 501 insertions(+), 3 deletions(-) create mode 100644 dist/examples/cxx/tree/secure/makefile create mode 100644 examples/cxx/tree/secure/README create mode 100644 examples/cxx/tree/secure/driver.cxx create mode 100644 examples/cxx/tree/secure/library.xml create mode 100644 examples/cxx/tree/secure/library.xsd create mode 100644 examples/cxx/tree/secure/makefile create mode 100644 examples/cxx/tree/secure/secure-dom-parser.cxx create mode 100644 examples/cxx/tree/secure/secure-dom-parser.hxx diff --git a/dist/examples/cxx/tree/makefile b/dist/examples/cxx/tree/makefile index 44e7a0f..32b44de 100644 --- a/dist/examples/cxx/tree/makefile +++ b/dist/examples/cxx/tree/makefile @@ -1,5 +1,5 @@ dirs := binary caching embedded custom hello library messaging mixed \ -multiroot order performance polymorphism streaming wildcard +multiroot order performance polymorphism secure streaming wildcard ifeq ($(WITH_ZLIB),1) dirs += compression diff --git a/dist/examples/cxx/tree/secure/makefile b/dist/examples/cxx/tree/secure/makefile new file mode 100644 index 0000000..4898a10 --- /dev/null +++ b/dist/examples/cxx/tree/secure/makefile @@ -0,0 +1,28 @@ +root := ../../.. + +include $(root)/build/cxx/rules.make +include $(root)/build/xsd/tree-rules.make + +# Build. +# +driver: driver.o secure-dom-parser.o library.o + +library.o: library.cxx library.hxx +secure-dom-parser.o: secure-dom-parser.cxx secure-dom-parser.hxx +driver.o: driver.cxx secure-dom-parser.hxx library.hxx + +library.cxx library.hxx: library.xsd + + +# Test +# +.PHONY: test +test: driver library.xml + ./driver library.xml + + +# Clean. +# +.PHONY: clean +clean: + rm -f library.o library.?xx secure-dom-parser.o driver.o driver diff --git a/examples/cxx/tree/README b/examples/cxx/tree/README index 569bf11..83ffcab 100644 --- a/examples/cxx/tree/README +++ b/examples/cxx/tree/README @@ -52,6 +52,10 @@ embedded into an application and then use it with the C++/Tree mapping to parse and validate XML documents. +secure + Shows how to perform more secure XML parsing by disabling the XML + External Entity (XXE) Processing. + performance Measures the performance of parsing and serialization. This example also shows how to structure your code to achieve the maximum diff --git a/examples/cxx/tree/makefile b/examples/cxx/tree/makefile index 172195a..353ad6d 100644 --- a/examples/cxx/tree/makefile +++ b/examples/cxx/tree/makefile @@ -5,11 +5,11 @@ include $(dir $(lastword $(MAKEFILE_LIST)))../../../build/bootstrap.make all_examples := binary caching embedded custom hello library messaging \ -mixed multiroot order performance polymorphism streaming wildcard \ +mixed multiroot order performance polymorphism secure streaming wildcard \ compression xpath build_examples := binary caching embedded custom hello library messaging \ -mixed multiroot order performance polymorphism streaming wildcard +mixed multiroot order performance polymorphism secure streaming wildcard ifeq ($(xsd_with_zlib),y) build_examples += compression diff --git a/examples/cxx/tree/secure/README b/examples/cxx/tree/secure/README new file mode 100644 index 0000000..649f0a3 --- /dev/null +++ b/examples/cxx/tree/secure/README @@ -0,0 +1,41 @@ +This example shows how to perform more secure XML parsing by disabling +the XML External Entity (XXE) Processing. If XML Schema validation is +used, then it would also make sense to pre-load the known schemas and +to disable loading of any external schemas, for example, via the +schemaLocation attribute found in the XML documents. See the comment +in driver.cxx for more information on how to achieve this. + +The example consists of the following files: + +library.xsd + XML Schema which describes a library of books. + +library.xml + Sample XML instance document. It includes (commented out) DOCTYPE + declarations with internal and external subsets that the parser + will refuse to process. + +library.hxx +library.cxx + C++ types that represent the given vocabulary and a set of parsing + functions that convert XML instance documents to a tree-like in-memory + object model. These are generated by the XSD compiler from library.xsd. + +secure-dom-parser.hxx +secure-dom-parser.cxx + A secure Xerces-C++ DOM parser implementation that disables processing + of internal/external DTD subsets. + +driver.cxx + Driver for the example. It first sets up the secure DOM parser. It then + parses the input file to a DOM document using the secure DOM parser and + calls one of the parsing functions that constructs the object model from + this DOM document. Finally, the driver prints a number of books in the + object model to STDERR. + +To run the example on the sample XML instance document simply execute: + +$ ./driver library.xml + +To verify that DTD processing is disabled, uncomment a different DOCTYPE +version in the sample document. diff --git a/examples/cxx/tree/secure/driver.cxx b/examples/cxx/tree/secure/driver.cxx new file mode 100644 index 0000000..2dfb3bf --- /dev/null +++ b/examples/cxx/tree/secure/driver.cxx @@ -0,0 +1,141 @@ +// file : examples/cxx/tree/secure/driver.cxx +// copyright : not copyrighted - public domain + +#include // std::auto_ptr +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "library.hxx" +#include "secure-dom-parser.hxx" + +using namespace std; + +int +main (int argc, char* argv[]) +{ + if (argc != 2) + { + cerr << "usage: " << argv[0] << " library.xml" << endl; + return 1; + } + + int r (0); + + // We need to initialize the Xerces-C++ runtime because we + // are doing the XML-to-DOM parsing ourselves. + // + xercesc::XMLPlatformUtils::Initialize (); + + try + { + using namespace xercesc; + namespace xml = xsd::cxx::xml; + namespace tree = xsd::cxx::tree; + + xml::dom::auto_ptr parser (new SecureDOMParser ()); + + DOMConfiguration* conf (parser->getDomConfig ()); + + // Discard comment nodes in the document. + // + conf->setParameter (XMLUni::fgDOMComments, false); + + // Enable datatype normalization. + // + conf->setParameter (XMLUni::fgDOMDatatypeNormalization, true); + + // Do not create EntityReference nodes in the DOM tree. No + // EntityReference nodes will be created, only the nodes + // corresponding to their fully expanded substitution text + // will be created. + // + conf->setParameter (XMLUni::fgDOMEntities, false); + + // Perform namespace processing. + // + conf->setParameter (XMLUni::fgDOMNamespaces, true); + + // Do not include ignorable whitespace in the DOM tree. + // + conf->setParameter (XMLUni::fgDOMElementContentWhitespace, false); + + // Enable validation. + // + conf->setParameter (XMLUni::fgDOMValidate, true); + conf->setParameter (XMLUni::fgXercesSchema, true); + conf->setParameter (XMLUni::fgXercesSchemaFullChecking, false); + + // Xerces-C++ 3.1.0 is the first version with working multi import + // support. + // +#if _XERCES_VERSION >= 30100 + conf->setParameter (XMLUni::fgXercesHandleMultipleImports, true); +#endif + + // Disable loading schemas via other means (e.g., schemaLocation). + // + // Note: this might be a good idea though if you need validation, + // you will need to pre-load the schema via other means. See the + // 'caching' and 'embedded' examples for different approaches. + // Both of them can be used with SecureDOMParser. + // + // conf->setParameter (XMLUni::fgXercesLoadSchema, false); + + // We will release the DOM document ourselves. + // + conf->setParameter (XMLUni::fgXercesUserAdoptsDOMDocument, true); + + // Set error handler. + // + tree::error_handler eh; + xml::dom::bits::error_handler_proxy ehp (eh); + conf->setParameter (XMLUni::fgDOMErrorHandler, &ehp); + + // Parse the XML document. + // + ifstream ifs; + ifs.exceptions (ifstream::badbit | ifstream::failbit); + ifs.open (argv[1]); + + // Wrap the standard input stream. + // + xml::sax::std_input_source isrc (ifs, argv[1]); + Wrapper4InputSource wrap (&isrc, false); + + // Parse XML to DOM. + // + xml_schema::dom::auto_ptr doc (parser->parse (&wrap)); + + eh.throw_if_failed (); + + // Parse DOM to the object model. + // + auto_ptr c (library::catalog_ (*doc)); + + cerr << "catalog with " << c->book ().size () << " books" << endl; + } + catch (const xml_schema::exception& e) + { + cerr << e << endl; + r = 1; + } + catch (const std::ios_base::failure&) + { + cerr << argv[1] << ": unable to open or read failure" << endl; + r = 1; + } + + xercesc::XMLPlatformUtils::Terminate (); + return r; +} diff --git a/examples/cxx/tree/secure/library.xml b/examples/cxx/tree/secure/library.xml new file mode 100644 index 0000000..33a2041 --- /dev/null +++ b/examples/cxx/tree/secure/library.xml @@ -0,0 +1,59 @@ + + + + + + + + + + + 0679760806 + The Master and Margarita + fiction + + + Mikhail Bulgakov + 1891-05-15 + 1940-03-10 + + + + + + 0679600841 + War and Peace + history + + + Leo Tolstoy + 1828-09-09 + 1910-11-20 + + + + + + 0679420290 + Crime and Punishment + philosophy + + + Fyodor Dostoevsky + 1821-11-11 + 1881-02-09 + + + + diff --git a/examples/cxx/tree/secure/library.xsd b/examples/cxx/tree/secure/library.xsd new file mode 100644 index 0000000..afad24b --- /dev/null +++ b/examples/cxx/tree/secure/library.xsd @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/examples/cxx/tree/secure/makefile b/examples/cxx/tree/secure/makefile new file mode 100644 index 0000000..8615108 --- /dev/null +++ b/examples/cxx/tree/secure/makefile @@ -0,0 +1,104 @@ +# file : examples/cxx/tree/secure/makefile +# copyright : Copyright (c) 2005-2014 Code Synthesis Tools CC +# license : GNU GPL v2 + exceptions; see accompanying LICENSE file + +include $(dir $(lastword $(MAKEFILE_LIST)))../../../../build/bootstrap.make + +xsd := library.xsd +cxx := driver.cxx secure-dom-parser.cxx + +obj := $(addprefix $(out_base)/,$(cxx:.cxx=.o) $(xsd:.xsd=.o)) +dep := $(obj:.o=.o.d) + +driver := $(out_base)/driver +install := $(out_base)/.install +dist := $(out_base)/.dist +dist-win := $(out_base)/.dist-win +clean := $(out_base)/.clean + + +# Import. +# +$(call import,\ + $(scf_root)/import/libxerces-c/stub.make,\ + l: xerces_c.l,cpp-options: xerces_c.l.cpp-options) + + +# Build. +# +$(driver): $(obj) $(xerces_c.l) + +$(obj) $(dep): cpp_options := -I$(out_base) -I$(src_base) -I$(src_root)/libxsd +$(obj) $(dep): $(xerces_c.l.cpp-options) + +genf := $(xsd:.xsd=.hxx) $(xsd:.xsd=.ixx) $(xsd:.xsd=.cxx) +gen := $(addprefix $(out_base)/,$(genf)) + +$(gen): xsd := $(out_root)/xsd/xsd +$(gen): $(out_root)/xsd/xsd + +$(call include-dep,$(dep),$(obj),$(gen)) + +# Convenience alias for default target. +# +$(out_base)/: $(driver) + + +# Install & Dist. +# +dist-common := $(out_base)/.dist-common + +$(install) $(dist) $(dist-win) $(dist-common): path := $(subst $(src_root)/,,$(src_base)) + +$(install): + $(call install-data,$(src_base)/README,$(install_doc_dir)/xsd/$(path)/README) + $(call install-data,$(src_base)/driver.cxx,$(install_doc_dir)/xsd/$(path)/driver.cxx) + $(call install-data,$(src_base)/library.xsd,$(install_doc_dir)/xsd/$(path)/library.xsd) + $(call install-data,$(src_base)/library.xml,$(install_doc_dir)/xsd/$(path)/library.xml) + $(call install-data,$(src_base)/secure-dom-parser.hxx,$(install_doc_dir)/xsd/$(path)/secure-dom-parser.hxx) + $(call install-data,$(src_base)/secure-dom-parser.cxx,$(install_doc_dir)/xsd/$(path)/secure-dom-parser.cxx) + +$(dist-common): + $(call install-data,$(src_base)/driver.cxx,$(dist_prefix)/$(path)/driver.cxx) + $(call install-data,$(src_base)/library.xsd,$(dist_prefix)/$(path)/library.xsd) + $(call install-data,$(src_base)/library.xml,$(dist_prefix)/$(path)/library.xml) + $(call install-data,$(src_base)/secure-dom-parser.hxx,$(dist_prefix)/$(path)/secure-dom-parser.hxx) + $(call install-data,$(src_base)/secure-dom-parser.cxx,$(dist_prefix)/$(path)/secure-dom-parser.cxx) + +$(dist): $(dist-common) + $(call install-data,$(src_base)/README,$(dist_prefix)/$(path)/README) + +$(dist-win): $(dist-common) + $(call install-data,$(src_base)/README,$(dist_prefix)/$(path)/README.txt) + $(call message,,todos $(dist_prefix)/$(path)/README.txt) + +# Clean. +# +$(clean): $(driver).o.clean \ + $(addsuffix .cxx.clean,$(obj)) \ + $(addsuffix .cxx.clean,$(dep)) \ + $(addprefix $(out_base)/,$(xsd:.xsd=.cxx.xsd.clean)) + +# Generated .gitignore. +# +ifeq ($(out_base),$(src_base)) +$(gen): | $(out_base)/.gitignore +$(driver): | $(out_base)/.gitignore + +$(out_base)/.gitignore: files := driver $(genf) +$(clean): $(out_base)/.gitignore.clean + +$(call include,$(bld_root)/git/gitignore.make) +endif + +# How to. +# +$(call include,$(bld_root)/cxx/o-e.make) +$(call include,$(bld_root)/cxx/cxx-o.make) +$(call include,$(bld_root)/cxx/cxx-d.make) +$(call include,$(bld_root)/install.make) +$(call include,$(scf_root)/xsd/tree/xsd-cxx.make) + +# Dependencies. +# +$(call import,$(src_root)/xsd/makefile) diff --git a/examples/cxx/tree/secure/secure-dom-parser.cxx b/examples/cxx/tree/secure/secure-dom-parser.cxx new file mode 100644 index 0000000..9008e1e --- /dev/null +++ b/examples/cxx/tree/secure/secure-dom-parser.cxx @@ -0,0 +1,24 @@ +// file : examples/cxx/tree/secure/secure-dom-parser.cxx +// copyright : not copyrighted - public domain + +#include "secure-dom-parser.hxx" + +#include +#include + +using namespace xercesc; + +void SecureDOMParser:: +doctypeDecl (const DTDElementDecl& e, + const XMLCh* const pub_id, + const XMLCh* const sys_id, + const bool hasi, + const bool hase) +{ + if (hasi || hase) + ThrowXMLwithMemMgr(RuntimeException, + XMLExcepts::Gen_NoDTDValidator, + fMemoryManager); + + DOMLSParserImpl::doctypeDecl (e, pub_id, sys_id, hasi, hase); +} diff --git a/examples/cxx/tree/secure/secure-dom-parser.hxx b/examples/cxx/tree/secure/secure-dom-parser.hxx new file mode 100644 index 0000000..20783b8 --- /dev/null +++ b/examples/cxx/tree/secure/secure-dom-parser.hxx @@ -0,0 +1,25 @@ +// file : examples/cxx/tree/secure/secure-dom-parser.hxx +// copyright : not copyrighted - public domain + +#ifndef SECURE_DOM_PARSER_HXX +#define SECURE_DOM_PARSER_HXX + +#include + +class SecureDOMParser: public xercesc::DOMLSParserImpl +{ +public: + SecureDOMParser (xercesc::MemoryManager* mm = + xercesc::XMLPlatformUtils::fgMemoryManager, + xercesc::XMLGrammarPool* gp = 0) + : DOMLSParserImpl (0, mm, gp) {} + + virtual void + doctypeDecl (const xercesc::DTDElementDecl& root, + const XMLCh* const public_id, + const XMLCh* const system_id, + const bool has_internal, + const bool has_external); +}; + +#endif // SECURE_DOM_PARSER_HXX -- cgit v1.1