From 9553149aa6b6561c49981adf2848607a43765054 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Mon, 11 Oct 2010 12:43:48 +0200 Subject: Support for validation of the pattern facet New test: hybrid/pattern. --- NEWS | 12 + build/configuration.make | 2 + build/configure | 7 + dist/build/config.make | 1 + dist/config/config.make | 9 + dist/config/config.nmake | 9 + dist/etc/evc-4.0/config.nmake | 9 + dist/etc/integrity/config.make | 9 + dist/etc/iphone/config-device.make | 9 + dist/etc/iphone/config-simulator.make | 9 + dist/etc/lynxos/config-4.2.make | 9 + dist/etc/lynxos/config-5.0.make | 9 + dist/etc/qnx/config-6.3-gcc-2.95.make | 9 + dist/etc/qnx/config-6.3-gcc-3.3.make | 9 + dist/etc/qnx/config-6.4.make | 9 + dist/etc/vc-8.0/config-max.nmake | 9 + dist/etc/vc-8.0/config-min.nmake | 9 + dist/etc/vc-9.0/config-max.nmake | 9 + dist/etc/vc-9.0/config-min.nmake | 9 + dist/etc/vxworks/config-5.5.1.make | 9 + dist/etc/vxworks/config-6.4-max.make | 9 + dist/etc/vxworks/config-6.4-min.make | 9 + dist/etc/vxworks/config-6.7-max.make | 9 + dist/etc/vxworks/config-6.7-min.make | 9 + dist/libxsde/xsde/makefile | 9 + dist/libxsde/xsde/nmakefile | 11 +- dist/tests/cxx/hybrid/makefile | 2 +- dist/tests/cxx/hybrid/nmakefile | 2 +- libxsde/xsde/c/regexp/COPYING | 40 + libxsde/xsde/c/regexp/chvalid.c | 388 ++ libxsde/xsde/c/regexp/chvalid.h | 63 + libxsde/xsde/c/regexp/xmlregexp.c | 4375 ++++++++++++++++++++ libxsde/xsde/c/regexp/xmlregexp.h | 39 + libxsde/xsde/c/regexp/xmlunicode.c | 3172 ++++++++++++++ libxsde/xsde/c/regexp/xmlunicode.h | 195 + .../xsde/cxx/parser/validating/string-common.cxx | 43 + .../cxx/parser/validating/xml-schema-pskel.hxx | 24 + .../cxx/parser/validating/xml-schema-pskel.ixx | 27 + libxsde/xsde/cxx/schema-error.cxx | 1 + libxsde/xsde/cxx/schema-error.hxx | 1 + .../cxx/serializer/validating/string-common.cxx | 50 +- .../cxx/serializer/validating/xml-schema-sskel.hxx | 25 + .../cxx/serializer/validating/xml-schema-sskel.ixx | 27 + libxsde/xsde/makefile | 16 + tests/cxx/hybrid/makefile | 4 +- tests/cxx/hybrid/pattern/driver.cxx | 56 + tests/cxx/hybrid/pattern/makefile | 108 + tests/cxx/hybrid/pattern/test-000.std | 11 + tests/cxx/hybrid/pattern/test-000.xml | 13 + tests/cxx/hybrid/pattern/test.xsd | 53 + xsde/cxx/parser/elements.cxx | 32 +- xsde/cxx/parser/parser-inline.cxx | 107 +- xsde/cxx/serializer/elements.cxx | 36 +- xsde/cxx/serializer/serializer-inline.cxx | 81 +- 54 files changed, 9115 insertions(+), 98 deletions(-) create mode 100644 libxsde/xsde/c/regexp/COPYING create mode 100644 libxsde/xsde/c/regexp/chvalid.c create mode 100644 libxsde/xsde/c/regexp/chvalid.h create mode 100644 libxsde/xsde/c/regexp/xmlregexp.c create mode 100644 libxsde/xsde/c/regexp/xmlregexp.h create mode 100644 libxsde/xsde/c/regexp/xmlunicode.c create mode 100644 libxsde/xsde/c/regexp/xmlunicode.h create mode 100644 tests/cxx/hybrid/pattern/driver.cxx create mode 100644 tests/cxx/hybrid/pattern/makefile create mode 100644 tests/cxx/hybrid/pattern/test-000.std create mode 100644 tests/cxx/hybrid/pattern/test-000.xml create mode 100644 tests/cxx/hybrid/pattern/test.xsd diff --git a/NEWS b/NEWS index 08c82ca..44f06c9 100644 --- a/NEWS +++ b/NEWS @@ -66,6 +66,18 @@ Version 3.2.0 automatically converted to equivalent enumeration types with a union of all the member's enumerators. + C++/Parser + + * Support for validation of the pattern XML Schema facet on string-base + types. See the XSDE_REGEXP parameter in the configuration files for + details. + + C++/Serializer + + * Support for validation of the pattern XML Schema facet on string-base + types. See the XSDE_REGEXP parameter in the configuration files for + details. + Version 3.1.0 C++/Hybrid diff --git a/build/configuration.make b/build/configuration.make index 0de6d47..cea0216 100644 --- a/build/configuration.make +++ b/build/configuration.make @@ -19,6 +19,7 @@ xsde_longlong := xsde_snprintf := xsde_parser_validation := xsde_serializer_validation := +xsde_regexp := xsde_reuse_style := xsde_custom_allocator := xsde_default_allocator := @@ -47,6 +48,7 @@ $(out_root)/%: xsde_longlong := $(xsde_longlong) $(out_root)/%: xsde_snprintf := $(xsde_snprintf) $(out_root)/%: xsde_parser_validation := $(xsde_parser_validation) $(out_root)/%: xsde_serializer_validation := $(xsde_serializer_validation) +$(out_root)/%: xsde_regexp := $(xsde_regexp) $(out_root)/%: xsde_reuse_style := $(xsde_reuse_style) $(out_root)/%: xsde_custom_allocator := $(xsde_custom_allocator) $(out_root)/%: xsde_default_allocator := $(xsde_default_allocator) diff --git a/build/configure b/build/configure index 7e37c13..7f22df9 100755 --- a/build/configure +++ b/build/configure @@ -97,6 +97,12 @@ $echo serializer_validation=`read_y_n y` $echo +$echo "Would you like to include regexp support for xs:pattern validation?" +$echo + +regexp=`read_y_n y` + +$echo $echo "Please select the base parser/serializer reuse style you would" $echo "like to use:" $echo @@ -197,6 +203,7 @@ echo "xsde_longlong := $longlong" >> echo "xsde_snprintf := $snprintf" >>$1 echo "xsde_parser_validation := $parser_validation" >>$1 echo "xsde_serializer_validation := $serializer_validation" >>$1 +echo "xsde_regexp := $regexp" >>$1 echo "xsde_reuse_style := $reuse_style" >>$1 echo "xsde_custom_allocator := $allocator" >>$1 echo "xsde_default_allocator := $allocator_default" >>$1 diff --git a/dist/build/config.make b/dist/build/config.make index 33b6a8e..e7e9aaa 100644 --- a/dist/build/config.make +++ b/dist/build/config.make @@ -23,6 +23,7 @@ XSDE_LONGLONG := $(strip $(XSDE_LONGLONG)) XSDE_SNPRINTF := $(strip $(XSDE_SNPRINTF)) XSDE_PARSER_VALIDATION := $(strip $(XSDE_PARSER_VALIDATION)) XSDE_SERIALIZER_VALIDATION := $(strip $(XSDE_SERIALIZER_VALIDATION)) +XSDE_REGEXP := $(strip $(XSDE_REGEXP)) XSDE_REUSE_STYLE := $(strip $(XSDE_REUSE_STYLE)) XSDE_CUSTOM_ALLOCATOR := $(strip $(XSDE_CUSTOM_ALLOCATOR)) XSDE_DEFAULT_ALLOCATOR := $(strip $(XSDE_DEFAULT_ALLOCATOR)) diff --git a/dist/config/config.make b/dist/config/config.make index 95dc46f..426384e 100644 --- a/dist/config/config.make +++ b/dist/config/config.make @@ -108,6 +108,15 @@ XSDE_PARSER_VALIDATION := y XSDE_SERIALIZER_VALIDATION := y +# Set to 'y' if you would like to have support for regular expressions in +# the XSD/e runtime. If the regexp support is enabled, then the parser and +# serializer validation code will use it to validate the xs:pattern facet. +# If the regexp support is disabled, then this facet will be ignored. The +# regexp support increases the resulting executable size by about 30-50Kb. +# +XSDE_REGEXP := n + + # Base parser/serializer implementation reuse style. Valid values are: # # 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin) diff --git a/dist/config/config.nmake b/dist/config/config.nmake index 3e23c25..24a267f 100644 --- a/dist/config/config.nmake +++ b/dist/config/config.nmake @@ -107,6 +107,15 @@ XSDE_PARSER_VALIDATION = y XSDE_SERIALIZER_VALIDATION = y +# Set to 'y' if you would like to have support for regular expressions in +# the XSD/e runtime. If the regexp support is enabled, then the parser and +# serializer validation code will use it to validate the xs:pattern facet. +# If the regexp support is disabled, then this facet will be ignored. The +# regexp support increases the resulting executable size by about 30-50Kb. +# +XSDE_REGEXP = n + + # Base parser/serializer implementation reuse style. Valid values are: # # 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin) diff --git a/dist/etc/evc-4.0/config.nmake b/dist/etc/evc-4.0/config.nmake index 825c909..e5b9a56 100644 --- a/dist/etc/evc-4.0/config.nmake +++ b/dist/etc/evc-4.0/config.nmake @@ -110,6 +110,15 @@ XSDE_PARSER_VALIDATION = y XSDE_SERIALIZER_VALIDATION = y +# Set to 'y' if you would like to have support for regular expressions in +# the XSD/e runtime. If the regexp support is enabled, then the parser and +# serializer validation code will use it to validate the xs:pattern facet. +# If the regexp support is disabled, then this facet will be ignored. The +# regexp support increases the resulting executable size by about 30-50Kb. +# +XSDE_REGEXP = n + + # Base parser/serializer implementation reuse style. Valid values are: # # 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin) diff --git a/dist/etc/integrity/config.make b/dist/etc/integrity/config.make index 943b208..5755056 100644 --- a/dist/etc/integrity/config.make +++ b/dist/etc/integrity/config.make @@ -113,6 +113,15 @@ XSDE_PARSER_VALIDATION := y XSDE_SERIALIZER_VALIDATION := y +# Set to 'y' if you would like to have support for regular expressions in +# the XSD/e runtime. If the regexp support is enabled, then the parser and +# serializer validation code will use it to validate the xs:pattern facet. +# If the regexp support is disabled, then this facet will be ignored. The +# regexp support increases the resulting executable size by about 30-50Kb. +# +XSDE_REGEXP := n + + # Base parser/serializer implementation reuse style. Valid values are: # # 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin) diff --git a/dist/etc/iphone/config-device.make b/dist/etc/iphone/config-device.make index 8d11943..9b98c36 100644 --- a/dist/etc/iphone/config-device.make +++ b/dist/etc/iphone/config-device.make @@ -115,6 +115,15 @@ XSDE_PARSER_VALIDATION := y XSDE_SERIALIZER_VALIDATION := y +# Set to 'y' if you would like to have support for regular expressions in +# the XSD/e runtime. If the regexp support is enabled, then the parser and +# serializer validation code will use it to validate the xs:pattern facet. +# If the regexp support is disabled, then this facet will be ignored. The +# regexp support increases the resulting executable size by about 30-50Kb. +# +XSDE_REGEXP := n + + # Base parser/serializer implementation reuse style. Valid values are: # # 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin) diff --git a/dist/etc/iphone/config-simulator.make b/dist/etc/iphone/config-simulator.make index 48f0652..2fa5632 100644 --- a/dist/etc/iphone/config-simulator.make +++ b/dist/etc/iphone/config-simulator.make @@ -115,6 +115,15 @@ XSDE_PARSER_VALIDATION := y XSDE_SERIALIZER_VALIDATION := y +# Set to 'y' if you would like to have support for regular expressions in +# the XSD/e runtime. If the regexp support is enabled, then the parser and +# serializer validation code will use it to validate the xs:pattern facet. +# If the regexp support is disabled, then this facet will be ignored. The +# regexp support increases the resulting executable size by about 30-50Kb. +# +XSDE_REGEXP := n + + # Base parser/serializer implementation reuse style. Valid values are: # # 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin) diff --git a/dist/etc/lynxos/config-4.2.make b/dist/etc/lynxos/config-4.2.make index d1b1b68..1215121 100644 --- a/dist/etc/lynxos/config-4.2.make +++ b/dist/etc/lynxos/config-4.2.make @@ -114,6 +114,15 @@ XSDE_PARSER_VALIDATION := y XSDE_SERIALIZER_VALIDATION := y +# Set to 'y' if you would like to have support for regular expressions in +# the XSD/e runtime. If the regexp support is enabled, then the parser and +# serializer validation code will use it to validate the xs:pattern facet. +# If the regexp support is disabled, then this facet will be ignored. The +# regexp support increases the resulting executable size by about 30-50Kb. +# +XSDE_REGEXP := n + + # Base parser/serializer implementation reuse style. Valid values are: # # 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin) diff --git a/dist/etc/lynxos/config-5.0.make b/dist/etc/lynxos/config-5.0.make index b61ade3..d0a22bb 100644 --- a/dist/etc/lynxos/config-5.0.make +++ b/dist/etc/lynxos/config-5.0.make @@ -114,6 +114,15 @@ XSDE_PARSER_VALIDATION := y XSDE_SERIALIZER_VALIDATION := y +# Set to 'y' if you would like to have support for regular expressions in +# the XSD/e runtime. If the regexp support is enabled, then the parser and +# serializer validation code will use it to validate the xs:pattern facet. +# If the regexp support is disabled, then this facet will be ignored. The +# regexp support increases the resulting executable size by about 30-50Kb. +# +XSDE_REGEXP := n + + # Base parser/serializer implementation reuse style. Valid values are: # # 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin) diff --git a/dist/etc/qnx/config-6.3-gcc-2.95.make b/dist/etc/qnx/config-6.3-gcc-2.95.make index 1228db2..6a4a7e3 100644 --- a/dist/etc/qnx/config-6.3-gcc-2.95.make +++ b/dist/etc/qnx/config-6.3-gcc-2.95.make @@ -113,6 +113,15 @@ XSDE_PARSER_VALIDATION := y XSDE_SERIALIZER_VALIDATION := y +# Set to 'y' if you would like to have support for regular expressions in +# the XSD/e runtime. If the regexp support is enabled, then the parser and +# serializer validation code will use it to validate the xs:pattern facet. +# If the regexp support is disabled, then this facet will be ignored. The +# regexp support increases the resulting executable size by about 30-50Kb. +# +XSDE_REGEXP := n + + # Base parser/serializer implementation reuse style. Valid values are: # # 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin) diff --git a/dist/etc/qnx/config-6.3-gcc-3.3.make b/dist/etc/qnx/config-6.3-gcc-3.3.make index fe39d31..915fe6c 100644 --- a/dist/etc/qnx/config-6.3-gcc-3.3.make +++ b/dist/etc/qnx/config-6.3-gcc-3.3.make @@ -113,6 +113,15 @@ XSDE_PARSER_VALIDATION := y XSDE_SERIALIZER_VALIDATION := y +# Set to 'y' if you would like to have support for regular expressions in +# the XSD/e runtime. If the regexp support is enabled, then the parser and +# serializer validation code will use it to validate the xs:pattern facet. +# If the regexp support is disabled, then this facet will be ignored. The +# regexp support increases the resulting executable size by about 30-50Kb. +# +XSDE_REGEXP := n + + # Base parser/serializer implementation reuse style. Valid values are: # # 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin) diff --git a/dist/etc/qnx/config-6.4.make b/dist/etc/qnx/config-6.4.make index 33d3658..c8603cb 100644 --- a/dist/etc/qnx/config-6.4.make +++ b/dist/etc/qnx/config-6.4.make @@ -113,6 +113,15 @@ XSDE_PARSER_VALIDATION := y XSDE_SERIALIZER_VALIDATION := y +# Set to 'y' if you would like to have support for regular expressions in +# the XSD/e runtime. If the regexp support is enabled, then the parser and +# serializer validation code will use it to validate the xs:pattern facet. +# If the regexp support is disabled, then this facet will be ignored. The +# regexp support increases the resulting executable size by about 30-50Kb. +# +XSDE_REGEXP := n + + # Base parser/serializer implementation reuse style. Valid values are: # # 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin) diff --git a/dist/etc/vc-8.0/config-max.nmake b/dist/etc/vc-8.0/config-max.nmake index 07a7c0a..ce2b95e 100644 --- a/dist/etc/vc-8.0/config-max.nmake +++ b/dist/etc/vc-8.0/config-max.nmake @@ -111,6 +111,15 @@ XSDE_PARSER_VALIDATION = y XSDE_SERIALIZER_VALIDATION = y +# Set to 'y' if you would like to have support for regular expressions in +# the XSD/e runtime. If the regexp support is enabled, then the parser and +# serializer validation code will use it to validate the xs:pattern facet. +# If the regexp support is disabled, then this facet will be ignored. The +# regexp support increases the resulting executable size by about 30-50Kb. +# +XSDE_REGEXP = n + + # Base parser/serializer implementation reuse style. Valid values are: # # 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin) diff --git a/dist/etc/vc-8.0/config-min.nmake b/dist/etc/vc-8.0/config-min.nmake index 1177d58..a6f8e16 100644 --- a/dist/etc/vc-8.0/config-min.nmake +++ b/dist/etc/vc-8.0/config-min.nmake @@ -111,6 +111,15 @@ XSDE_PARSER_VALIDATION = y XSDE_SERIALIZER_VALIDATION = y +# Set to 'y' if you would like to have support for regular expressions in +# the XSD/e runtime. If the regexp support is enabled, then the parser and +# serializer validation code will use it to validate the xs:pattern facet. +# If the regexp support is disabled, then this facet will be ignored. The +# regexp support increases the resulting executable size by about 30-50Kb. +# +XSDE_REGEXP = n + + # Base parser/serializer implementation reuse style. Valid values are: # # 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin) diff --git a/dist/etc/vc-9.0/config-max.nmake b/dist/etc/vc-9.0/config-max.nmake index 6d39a73..8b6a71d 100644 --- a/dist/etc/vc-9.0/config-max.nmake +++ b/dist/etc/vc-9.0/config-max.nmake @@ -111,6 +111,15 @@ XSDE_PARSER_VALIDATION = y XSDE_SERIALIZER_VALIDATION = y +# Set to 'y' if you would like to have support for regular expressions in +# the XSD/e runtime. If the regexp support is enabled, then the parser and +# serializer validation code will use it to validate the xs:pattern facet. +# If the regexp support is disabled, then this facet will be ignored. The +# regexp support increases the resulting executable size by about 30-50Kb. +# +XSDE_REGEXP = n + + # Base parser/serializer implementation reuse style. Valid values are: # # 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin) diff --git a/dist/etc/vc-9.0/config-min.nmake b/dist/etc/vc-9.0/config-min.nmake index 337369f..f33f298 100644 --- a/dist/etc/vc-9.0/config-min.nmake +++ b/dist/etc/vc-9.0/config-min.nmake @@ -111,6 +111,15 @@ XSDE_PARSER_VALIDATION = y XSDE_SERIALIZER_VALIDATION = y +# Set to 'y' if you would like to have support for regular expressions in +# the XSD/e runtime. If the regexp support is enabled, then the parser and +# serializer validation code will use it to validate the xs:pattern facet. +# If the regexp support is disabled, then this facet will be ignored. The +# regexp support increases the resulting executable size by about 30-50Kb. +# +XSDE_REGEXP = n + + # Base parser/serializer implementation reuse style. Valid values are: # # 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin) diff --git a/dist/etc/vxworks/config-5.5.1.make b/dist/etc/vxworks/config-5.5.1.make index df7549e..c94dc3f 100644 --- a/dist/etc/vxworks/config-5.5.1.make +++ b/dist/etc/vxworks/config-5.5.1.make @@ -114,6 +114,15 @@ XSDE_PARSER_VALIDATION := y XSDE_SERIALIZER_VALIDATION := y +# Set to 'y' if you would like to have support for regular expressions in +# the XSD/e runtime. If the regexp support is enabled, then the parser and +# serializer validation code will use it to validate the xs:pattern facet. +# If the regexp support is disabled, then this facet will be ignored. The +# regexp support increases the resulting executable size by about 30-50Kb. +# +XSDE_REGEXP := n + + # Base parser/serializer implementation reuse style. Valid values are: # # 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin) diff --git a/dist/etc/vxworks/config-6.4-max.make b/dist/etc/vxworks/config-6.4-max.make index 835ba84..1811b38 100644 --- a/dist/etc/vxworks/config-6.4-max.make +++ b/dist/etc/vxworks/config-6.4-max.make @@ -126,6 +126,15 @@ XSDE_PARSER_VALIDATION := y XSDE_SERIALIZER_VALIDATION := y +# Set to 'y' if you would like to have support for regular expressions in +# the XSD/e runtime. If the regexp support is enabled, then the parser and +# serializer validation code will use it to validate the xs:pattern facet. +# If the regexp support is disabled, then this facet will be ignored. The +# regexp support increases the resulting executable size by about 30-50Kb. +# +XSDE_REGEXP := n + + # Base parser/serializer implementation reuse style. Valid values are: # # 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin) diff --git a/dist/etc/vxworks/config-6.4-min.make b/dist/etc/vxworks/config-6.4-min.make index 2ac5ef3..4116c10 100644 --- a/dist/etc/vxworks/config-6.4-min.make +++ b/dist/etc/vxworks/config-6.4-min.make @@ -113,6 +113,15 @@ XSDE_PARSER_VALIDATION := y XSDE_SERIALIZER_VALIDATION := y +# Set to 'y' if you would like to have support for regular expressions in +# the XSD/e runtime. If the regexp support is enabled, then the parser and +# serializer validation code will use it to validate the xs:pattern facet. +# If the regexp support is disabled, then this facet will be ignored. The +# regexp support increases the resulting executable size by about 30-50Kb. +# +XSDE_REGEXP := n + + # Base parser/serializer implementation reuse style. Valid values are: # # 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin) diff --git a/dist/etc/vxworks/config-6.7-max.make b/dist/etc/vxworks/config-6.7-max.make index 30e7347..77ad171 100644 --- a/dist/etc/vxworks/config-6.7-max.make +++ b/dist/etc/vxworks/config-6.7-max.make @@ -126,6 +126,15 @@ XSDE_PARSER_VALIDATION := y XSDE_SERIALIZER_VALIDATION := y +# Set to 'y' if you would like to have support for regular expressions in +# the XSD/e runtime. If the regexp support is enabled, then the parser and +# serializer validation code will use it to validate the xs:pattern facet. +# If the regexp support is disabled, then this facet will be ignored. The +# regexp support increases the resulting executable size by about 30-50Kb. +# +XSDE_REGEXP := n + + # Base parser/serializer implementation reuse style. Valid values are: # # 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin) diff --git a/dist/etc/vxworks/config-6.7-min.make b/dist/etc/vxworks/config-6.7-min.make index b624c8c..babc9f3 100644 --- a/dist/etc/vxworks/config-6.7-min.make +++ b/dist/etc/vxworks/config-6.7-min.make @@ -113,6 +113,15 @@ XSDE_PARSER_VALIDATION := y XSDE_SERIALIZER_VALIDATION := y +# Set to 'y' if you would like to have support for regular expressions in +# the XSD/e runtime. If the regexp support is enabled, then the parser and +# serializer validation code will use it to validate the xs:pattern facet. +# If the regexp support is disabled, then this facet will be ignored. The +# regexp support increases the resulting executable size by about 30-50Kb. +# +XSDE_REGEXP := n + + # Base parser/serializer implementation reuse style. Valid values are: # # 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin) diff --git a/dist/libxsde/xsde/makefile b/dist/libxsde/xsde/makefile index a134d29..1e3db26 100644 --- a/dist/libxsde/xsde/makefile +++ b/dist/libxsde/xsde/makefile @@ -9,6 +9,12 @@ EXTRA_CPPFLAGS := -I.. src := c/expat/xmlparse.c c/expat/xmlrole.c c/expat/xmltok.c src += c/genx/genx.c c/genx/char-props.c +ifneq ($(XSDE_PARSER_VALIDATION)$(XSDE_SERIALIZER_VALIDATION),nn) +ifeq ($(XSDE_REGEXP),y) +src += c/regexp/chvalid.c c/regexp/xmlunicode.c c/regexp/xmlregexp.c +endif +endif + ifeq ($(XSDE_CUSTOM_ALLOCATOR),y) ifeq ($(XSDE_DEFAULT_ALLOCATOR),y) src += allocator.c @@ -563,6 +569,9 @@ endif ifeq ($(XSDE_SERIALIZER_VALIDATION),y) @echo $(h)define XSDE_SERIALIZER_VALIDATION >>$@ endif +ifeq ($(XSDE_REGEXP),y) + @echo $(h)define XSDE_REGEXP >>$@ +endif ifeq ($(XSDE_REUSE_STYLE),mixin) @echo $(h)define XSDE_REUSE_STYLE_MIXIN >>$@ else diff --git a/dist/libxsde/xsde/nmakefile b/dist/libxsde/xsde/nmakefile index 9eb9aea..da40ece 100644 --- a/dist/libxsde/xsde/nmakefile +++ b/dist/libxsde/xsde/nmakefile @@ -11,6 +11,12 @@ EXTRA_CPPFLAGS = /I.. src = c\expat\xmlparse.c c\expat\xmlrole.c c\expat\xmltok.c src = $(src) c\genx\genx.c c\genx\char-props.c +!if "$(XSDE_PARSER_VALIDATION)$(XSDE_SERIALIZER_VALIDATION)" != "nn" +!if "$(XSDE_REGEXP)" == "n" +src = $(src) c\regexp\chvalid.c c\regexp\xmlunicode.c c\regexp\xmlregexp.c +!endif +!endif + !if "$(XSDE_CUSTOM_ALLOCATOR)" == "y" !if "$(XSDE_DEFAULT_ALLOCATOR)" == "y" src = $(src) allocator.c @@ -562,7 +568,9 @@ config.h: !if "$(XSDE_SERIALIZER_VALIDATION)" == "y" @echo #define XSDE_SERIALIZER_VALIDATION >>$@ !endif - +!if "$(XSDE_REGEXP)" == "y" + @echo #define REGEXP >>$@ +!endif !if "$(XSDE_REUSE_STYLE)" == "mixin" @echo #define XSDE_REUSE_STYLE_MIXIN >>$@ !else @@ -619,6 +627,7 @@ cleanobj: -del xsde.lib config.h -del c\expat\*.obj -del c\genx\*.obj + -del c\regexp\*.obj -del cxx\*.obj -del cxx\parser\*.obj -del cxx\parser\expat\*.obj diff --git a/dist/tests/cxx/hybrid/makefile b/dist/tests/cxx/hybrid/makefile index 62bbc1e..9b5fea2 100644 --- a/dist/tests/cxx/hybrid/makefile +++ b/dist/tests/cxx/hybrid/makefile @@ -15,7 +15,7 @@ endif endif ifeq ($(XSDE_IOSTREAM),y) -dirs += built-in default enumeration list test-template union +dirs += built-in default enumeration list test-template union pattern ifeq ($(XSDE_ENCODING),iso8859-1) dirs += iso8859-1 diff --git a/dist/tests/cxx/hybrid/nmakefile b/dist/tests/cxx/hybrid/nmakefile index 61097dd..560f0b1 100644 --- a/dist/tests/cxx/hybrid/nmakefile +++ b/dist/tests/cxx/hybrid/nmakefile @@ -15,7 +15,7 @@ dirs = $(dirs) iterator !endif !if "$(XSDE_IOSTREAM)" == "y" -dirs = $(dirs) built-in default enumeration list test-template union +dirs = $(dirs) built-in default enumeration list test-template union pattern !if "$(XSDE_ENCODING)" == "iso8859-1" dirs = $(dirs) iso8859-1 diff --git a/libxsde/xsde/c/regexp/COPYING b/libxsde/xsde/c/regexp/COPYING new file mode 100644 index 0000000..9cedbe2 --- /dev/null +++ b/libxsde/xsde/c/regexp/COPYING @@ -0,0 +1,40 @@ +Copyright (c) 2009-2010 Code Synthesis Tools CC. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License version 2 as +published by the Free Software Foundation. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + +Original Version: + +Copyright (C) 1998-2003 Daniel Veillard. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is fur- +nished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT- +NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CON- +NECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +Except as contained in this notice, the name of Daniel Veillard shall not +be used in advertising or otherwise to promote the sale, use or other deal- +ings in this Software without prior written authorization from him. diff --git a/libxsde/xsde/c/regexp/chvalid.c b/libxsde/xsde/c/regexp/chvalid.c new file mode 100644 index 0000000..ad248ad --- /dev/null +++ b/libxsde/xsde/c/regexp/chvalid.c @@ -0,0 +1,388 @@ +/* + * chvalid.c: this module implements the character range + * validation APIs + * + * This file is automatically generated from the cvs source + * definition files using the genChRanges.py Python script + * + * Generation date: Mon Mar 27 11:09:48 2006 + * Sources: chvalid.def + */ +#include + +#include +#include "chvalid.h" + +/* + * The initial tables ({func_name}_tab) are used to validate whether a + * single-byte character is within the specified group. Each table + * contains 256 bytes, with each byte representing one of the 256 + * possible characters. If the table byte is set, the character is + * allowed. + * + */ +static const unsigned char xmlIsPubidChar_tab[256] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 }; + +static const xmlChSRange xmlIsBaseChar_srng[] = { {0x100, 0x131}, + {0x134, 0x13e}, {0x141, 0x148}, {0x14a, 0x17e}, {0x180, 0x1c3}, + {0x1cd, 0x1f0}, {0x1f4, 0x1f5}, {0x1fa, 0x217}, {0x250, 0x2a8}, + {0x2bb, 0x2c1}, {0x386, 0x386}, {0x388, 0x38a}, {0x38c, 0x38c}, + {0x38e, 0x3a1}, {0x3a3, 0x3ce}, {0x3d0, 0x3d6}, {0x3da, 0x3da}, + {0x3dc, 0x3dc}, {0x3de, 0x3de}, {0x3e0, 0x3e0}, {0x3e2, 0x3f3}, + {0x401, 0x40c}, {0x40e, 0x44f}, {0x451, 0x45c}, {0x45e, 0x481}, + {0x490, 0x4c4}, {0x4c7, 0x4c8}, {0x4cb, 0x4cc}, {0x4d0, 0x4eb}, + {0x4ee, 0x4f5}, {0x4f8, 0x4f9}, {0x531, 0x556}, {0x559, 0x559}, + {0x561, 0x586}, {0x5d0, 0x5ea}, {0x5f0, 0x5f2}, {0x621, 0x63a}, + {0x641, 0x64a}, {0x671, 0x6b7}, {0x6ba, 0x6be}, {0x6c0, 0x6ce}, + {0x6d0, 0x6d3}, {0x6d5, 0x6d5}, {0x6e5, 0x6e6}, {0x905, 0x939}, + {0x93d, 0x93d}, {0x958, 0x961}, {0x985, 0x98c}, {0x98f, 0x990}, + {0x993, 0x9a8}, {0x9aa, 0x9b0}, {0x9b2, 0x9b2}, {0x9b6, 0x9b9}, + {0x9dc, 0x9dd}, {0x9df, 0x9e1}, {0x9f0, 0x9f1}, {0xa05, 0xa0a}, + {0xa0f, 0xa10}, {0xa13, 0xa28}, {0xa2a, 0xa30}, {0xa32, 0xa33}, + {0xa35, 0xa36}, {0xa38, 0xa39}, {0xa59, 0xa5c}, {0xa5e, 0xa5e}, + {0xa72, 0xa74}, {0xa85, 0xa8b}, {0xa8d, 0xa8d}, {0xa8f, 0xa91}, + {0xa93, 0xaa8}, {0xaaa, 0xab0}, {0xab2, 0xab3}, {0xab5, 0xab9}, + {0xabd, 0xabd}, {0xae0, 0xae0}, {0xb05, 0xb0c}, {0xb0f, 0xb10}, + {0xb13, 0xb28}, {0xb2a, 0xb30}, {0xb32, 0xb33}, {0xb36, 0xb39}, + {0xb3d, 0xb3d}, {0xb5c, 0xb5d}, {0xb5f, 0xb61}, {0xb85, 0xb8a}, + {0xb8e, 0xb90}, {0xb92, 0xb95}, {0xb99, 0xb9a}, {0xb9c, 0xb9c}, + {0xb9e, 0xb9f}, {0xba3, 0xba4}, {0xba8, 0xbaa}, {0xbae, 0xbb5}, + {0xbb7, 0xbb9}, {0xc05, 0xc0c}, {0xc0e, 0xc10}, {0xc12, 0xc28}, + {0xc2a, 0xc33}, {0xc35, 0xc39}, {0xc60, 0xc61}, {0xc85, 0xc8c}, + {0xc8e, 0xc90}, {0xc92, 0xca8}, {0xcaa, 0xcb3}, {0xcb5, 0xcb9}, + {0xcde, 0xcde}, {0xce0, 0xce1}, {0xd05, 0xd0c}, {0xd0e, 0xd10}, + {0xd12, 0xd28}, {0xd2a, 0xd39}, {0xd60, 0xd61}, {0xe01, 0xe2e}, + {0xe30, 0xe30}, {0xe32, 0xe33}, {0xe40, 0xe45}, {0xe81, 0xe82}, + {0xe84, 0xe84}, {0xe87, 0xe88}, {0xe8a, 0xe8a}, {0xe8d, 0xe8d}, + {0xe94, 0xe97}, {0xe99, 0xe9f}, {0xea1, 0xea3}, {0xea5, 0xea5}, + {0xea7, 0xea7}, {0xeaa, 0xeab}, {0xead, 0xeae}, {0xeb0, 0xeb0}, + {0xeb2, 0xeb3}, {0xebd, 0xebd}, {0xec0, 0xec4}, {0xf40, 0xf47}, + {0xf49, 0xf69}, {0x10a0, 0x10c5}, {0x10d0, 0x10f6}, {0x1100, 0x1100}, + {0x1102, 0x1103}, {0x1105, 0x1107}, {0x1109, 0x1109}, {0x110b, 0x110c}, + {0x110e, 0x1112}, {0x113c, 0x113c}, {0x113e, 0x113e}, {0x1140, 0x1140}, + {0x114c, 0x114c}, {0x114e, 0x114e}, {0x1150, 0x1150}, {0x1154, 0x1155}, + {0x1159, 0x1159}, {0x115f, 0x1161}, {0x1163, 0x1163}, {0x1165, 0x1165}, + {0x1167, 0x1167}, {0x1169, 0x1169}, {0x116d, 0x116e}, {0x1172, 0x1173}, + {0x1175, 0x1175}, {0x119e, 0x119e}, {0x11a8, 0x11a8}, {0x11ab, 0x11ab}, + {0x11ae, 0x11af}, {0x11b7, 0x11b8}, {0x11ba, 0x11ba}, {0x11bc, 0x11c2}, + {0x11eb, 0x11eb}, {0x11f0, 0x11f0}, {0x11f9, 0x11f9}, {0x1e00, 0x1e9b}, + {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45}, + {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f59, 0x1f59}, {0x1f5b, 0x1f5b}, + {0x1f5d, 0x1f5d}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fbc}, + {0x1fbe, 0x1fbe}, {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fcc}, {0x1fd0, 0x1fd3}, + {0x1fd6, 0x1fdb}, {0x1fe0, 0x1fec}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffc}, + {0x2126, 0x2126}, {0x212a, 0x212b}, {0x212e, 0x212e}, {0x2180, 0x2182}, + {0x3041, 0x3094}, {0x30a1, 0x30fa}, {0x3105, 0x312c}, {0xac00, 0xd7a3}}; + +static const xmlChRangeGroup xmlIsBaseCharGroup = + {197, 0, xmlIsBaseChar_srng, (xmlChLRangePtr)0}; + +static const xmlChSRange xmlIsChar_srng[] = { {0x100, 0xd7ff}, + {0xe000, 0xfffd}}; +static const xmlChLRange xmlIsChar_lrng[] = { {0x10000, 0x10ffff}}; + +static const xmlChRangeGroup xmlIsCharGroup = + {2, 1, xmlIsChar_srng, xmlIsChar_lrng}; + +static const xmlChSRange xmlIsCombining_srng[] = { {0x300, 0x345}, + {0x360, 0x361}, {0x483, 0x486}, {0x591, 0x5a1}, {0x5a3, 0x5b9}, + {0x5bb, 0x5bd}, {0x5bf, 0x5bf}, {0x5c1, 0x5c2}, {0x5c4, 0x5c4}, + {0x64b, 0x652}, {0x670, 0x670}, {0x6d6, 0x6dc}, {0x6dd, 0x6df}, + {0x6e0, 0x6e4}, {0x6e7, 0x6e8}, {0x6ea, 0x6ed}, {0x901, 0x903}, + {0x93c, 0x93c}, {0x93e, 0x94c}, {0x94d, 0x94d}, {0x951, 0x954}, + {0x962, 0x963}, {0x981, 0x983}, {0x9bc, 0x9bc}, {0x9be, 0x9be}, + {0x9bf, 0x9bf}, {0x9c0, 0x9c4}, {0x9c7, 0x9c8}, {0x9cb, 0x9cd}, + {0x9d7, 0x9d7}, {0x9e2, 0x9e3}, {0xa02, 0xa02}, {0xa3c, 0xa3c}, + {0xa3e, 0xa3e}, {0xa3f, 0xa3f}, {0xa40, 0xa42}, {0xa47, 0xa48}, + {0xa4b, 0xa4d}, {0xa70, 0xa71}, {0xa81, 0xa83}, {0xabc, 0xabc}, + {0xabe, 0xac5}, {0xac7, 0xac9}, {0xacb, 0xacd}, {0xb01, 0xb03}, + {0xb3c, 0xb3c}, {0xb3e, 0xb43}, {0xb47, 0xb48}, {0xb4b, 0xb4d}, + {0xb56, 0xb57}, {0xb82, 0xb83}, {0xbbe, 0xbc2}, {0xbc6, 0xbc8}, + {0xbca, 0xbcd}, {0xbd7, 0xbd7}, {0xc01, 0xc03}, {0xc3e, 0xc44}, + {0xc46, 0xc48}, {0xc4a, 0xc4d}, {0xc55, 0xc56}, {0xc82, 0xc83}, + {0xcbe, 0xcc4}, {0xcc6, 0xcc8}, {0xcca, 0xccd}, {0xcd5, 0xcd6}, + {0xd02, 0xd03}, {0xd3e, 0xd43}, {0xd46, 0xd48}, {0xd4a, 0xd4d}, + {0xd57, 0xd57}, {0xe31, 0xe31}, {0xe34, 0xe3a}, {0xe47, 0xe4e}, + {0xeb1, 0xeb1}, {0xeb4, 0xeb9}, {0xebb, 0xebc}, {0xec8, 0xecd}, + {0xf18, 0xf19}, {0xf35, 0xf35}, {0xf37, 0xf37}, {0xf39, 0xf39}, + {0xf3e, 0xf3e}, {0xf3f, 0xf3f}, {0xf71, 0xf84}, {0xf86, 0xf8b}, + {0xf90, 0xf95}, {0xf97, 0xf97}, {0xf99, 0xfad}, {0xfb1, 0xfb7}, + {0xfb9, 0xfb9}, {0x20d0, 0x20dc}, {0x20e1, 0x20e1}, {0x302a, 0x302f}, + {0x3099, 0x3099}, {0x309a, 0x309a}}; + +static const xmlChRangeGroup xmlIsCombiningGroup = + {95, 0, xmlIsCombining_srng, (xmlChLRangePtr)0}; + +static const xmlChSRange xmlIsDigit_srng[] = { {0x660, 0x669}, + {0x6f0, 0x6f9}, {0x966, 0x96f}, {0x9e6, 0x9ef}, {0xa66, 0xa6f}, + {0xae6, 0xaef}, {0xb66, 0xb6f}, {0xbe7, 0xbef}, {0xc66, 0xc6f}, + {0xce6, 0xcef}, {0xd66, 0xd6f}, {0xe50, 0xe59}, {0xed0, 0xed9}, + {0xf20, 0xf29}}; + +static const xmlChRangeGroup xmlIsDigitGroup = + {14, 0, xmlIsDigit_srng, (xmlChLRangePtr)0}; + +static const xmlChSRange xmlIsExtender_srng[] = { {0x2d0, 0x2d0}, + {0x2d1, 0x2d1}, {0x387, 0x387}, {0x640, 0x640}, {0xe46, 0xe46}, + {0xec6, 0xec6}, {0x3005, 0x3005}, {0x3031, 0x3035}, {0x309d, 0x309e}, + {0x30fc, 0x30fe}}; + +static const xmlChRangeGroup xmlIsExtenderGroup = + {10, 0, xmlIsExtender_srng, (xmlChLRangePtr)0}; + +static const xmlChSRange xmlIsIdeographic_srng[] = { {0x3007, 0x3007}, + {0x3021, 0x3029}, {0x4e00, 0x9fa5}}; + +static const xmlChRangeGroup xmlIsIdeographicGroup = + {3, 0, xmlIsIdeographic_srng, (xmlChLRangePtr)0}; + +/** + * xmlCharInRange: + * @val: character to be validated + * @rptr: pointer to range to be used to validate + * + * Does a binary search of the range table to determine if char + * is valid + * + * Returns: true if character valid, false otherwise + */ +int +xmlCharInRange (unsigned int val, const xmlChRangeGroup *rptr) { + int low, high, mid; + const xmlChSRange *sptr; + const xmlChLRange *lptr; + + if (rptr == NULL) return(0); + if (val < 0x10000) { /* is val in 'short' or 'long' array? */ + if (rptr->nbShortRange == 0) + return 0; + low = 0; + high = rptr->nbShortRange - 1; + sptr = rptr->shortRange; + while (low <= high) { + mid = (low + high) / 2; + if ((unsigned short) val < sptr[mid].low) { + high = mid - 1; + } else { + if ((unsigned short) val > sptr[mid].high) { + low = mid + 1; + } else { + return 1; + } + } + } + } else { + if (rptr->nbLongRange == 0) { + return 0; + } + low = 0; + high = rptr->nbLongRange - 1; + lptr = rptr->longRange; + while (low <= high) { + mid = (low + high) / 2; + if (val < lptr[mid].low) { + high = mid - 1; + } else { + if (val > lptr[mid].high) { + low = mid + 1; + } else { + return 1; + } + } + } + } + return 0; +} + +/** + * xmlIsBaseChar_ch: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsBaseChar_ch(c) (((0x41 <= (c)) && ((c) <= 0x5a)) || \ + ((0x61 <= (c)) && ((c) <= 0x7a)) || \ + ((0xc0 <= (c)) && ((c) <= 0xd6)) || \ + ((0xd8 <= (c)) && ((c) <= 0xf6)) || \ + (0xf8 <= (c))) + +/** + * xmlIsBaseCharQ: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsBaseCharQ(c) (((c) < 0x100) ? \ + xmlIsBaseChar_ch((c)) : \ + xmlCharInRange((c), &xmlIsBaseCharGroup)) + +/** + * xmlIsBlank_ch: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsBlank_ch(c) (((c) == 0x20) || \ + ((0x9 <= (c)) && ((c) <= 0xa)) || \ + ((c) == 0xd)) + +/** + * xmlIsBlankQ: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsBlankQ(c) (((c) < 0x100) ? \ + xmlIsBlank_ch((c)) : 0) + + +/** + * xmlIsChar_ch: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsChar_ch(c) (((0x9 <= (c)) && ((c) <= 0xa)) || \ + ((c) == 0xd) || \ + (0x20 <= (c))) + +/** + * xmlIsCharQ: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsCharQ(c) (((c) < 0x100) ? \ + xmlIsChar_ch((c)) :\ + (((0x100 <= (c)) && ((c) <= 0xd7ff)) || \ + ((0xe000 <= (c)) && ((c) <= 0xfffd)) || \ + ((0x10000 <= (c)) && ((c) <= 0x10ffff)))) + +/** + * xmlIsCombiningQ: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsCombiningQ(c) (((c) < 0x100) ? \ + 0 : \ + xmlCharInRange((c), &xmlIsCombiningGroup)) + +/** + * xmlIsDigit_ch: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsDigit_ch(c) (((0x30 <= (c)) && ((c) <= 0x39))) + +/** + * xmlIsDigitQ: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsDigitQ(c) (((c) < 0x100) ? \ + xmlIsDigit_ch((c)) : \ + xmlCharInRange((c), &xmlIsDigitGroup)) + +/** + * xmlIsExtender_ch: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsExtender_ch(c) (((c) == 0xb7)) + +/** + * xmlIsExtenderQ: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsExtenderQ(c) (((c) < 0x100) ? \ + xmlIsExtender_ch((c)) : \ + xmlCharInRange((c), &xmlIsExtenderGroup)) + +/** + * xmlIsIdeographicQ: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsIdeographicQ(c) (((c) < 0x100) ? \ + 0 :\ + (((0x4e00 <= (c)) && ((c) <= 0x9fa5)) || \ + ((c) == 0x3007) || \ + ((0x3021 <= (c)) && ((c) <= 0x3029)))) + +/** + * xmlIsPubidChar_ch: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsPubidChar_ch(c) (xmlIsPubidChar_tab[(c)]) + +/** + * xmlIsPubidCharQ: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsPubidCharQ(c) (((c) < 0x100) ? \ + xmlIsPubidChar_ch((c)) : 0) + +int +xmlIsDigit (int c) +{ + return xmlIsDigitQ (c); +} + +int +xmlIsLetter (int c) +{ + return xmlIsBaseCharQ (c) || xmlIsIdeographicQ (c); +} + +int +xmlIsCombining (int c) +{ + return xmlIsCombiningQ (c); +} + +int +xmlIsExtender (int c) +{ + return xmlIsExtenderQ (c); +} + +int +xmlIsChar (int c) +{ + return xmlIsCharQ (c); +} + +#include diff --git a/libxsde/xsde/c/regexp/chvalid.h b/libxsde/xsde/c/regexp/chvalid.h new file mode 100644 index 0000000..9865350 --- /dev/null +++ b/libxsde/xsde/c/regexp/chvalid.h @@ -0,0 +1,63 @@ +/* + * Summary: Unicode character range checking + * Description: this module exports interfaces for the character + * range validation APIs + * + * This file is automatically generated from the cvs source + * definition files using the genChRanges.py Python script + * + * Generation date: Mon Mar 27 11:09:48 2006 + * Sources: chvalid.def + */ + +#ifndef __XML_CHVALID_H__ +#define __XML_CHVALID_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Define our typedefs and structures + * + */ +typedef struct _xmlChSRange xmlChSRange; +typedef xmlChSRange *xmlChSRangePtr; +struct _xmlChSRange { + unsigned short low; + unsigned short high; +}; + +typedef struct _xmlChLRange xmlChLRange; +typedef xmlChLRange *xmlChLRangePtr; +struct _xmlChLRange { + unsigned int low; + unsigned int high; +}; + +typedef struct _xmlChRangeGroup xmlChRangeGroup; +typedef xmlChRangeGroup *xmlChRangeGroupPtr; +struct _xmlChRangeGroup { + int nbShortRange; + int nbLongRange; + const xmlChSRange *shortRange; /* points to an array of ranges */ + const xmlChLRange *longRange; +}; + + + +/** + * Range checking routine + */ +int xmlCharInRange(unsigned int val, const xmlChRangeGroup *group); + +int xmlIsDigit (int c); +int xmlIsLetter (int c); +int xmlIsCombining (int c); +int xmlIsExtender (int c); +int xmlIsChar (int c); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_CHVALID_H__ */ diff --git a/libxsde/xsde/c/regexp/xmlregexp.c b/libxsde/xsde/c/regexp/xmlregexp.c new file mode 100644 index 0000000..ad493ce --- /dev/null +++ b/libxsde/xsde/c/regexp/xmlregexp.c @@ -0,0 +1,4375 @@ +/* + * regexp.c: generic and extensible Regular Expression engine + * + * Basically designed with the purpose of compiling regexps for + * the variety of validation/shemas mechanisms now available in + * XML related specifications these include: + * - XML-1.0 DTD validation + * - XML Schemas structure part 1 + * - XML Schemas Datatypes part 2 especially Appendix F + * - RELAX-NG/TREX i.e. the counter proposal + * + * See COPYING for the status of this software. + */ +#include + +#include +#include + +#include + +#ifdef XSDE_CUSTOM_ALLOCATOR +# include +#endif + +#include "xmlregexp.h" +#include "xmlunicode.h" +#include "chvalid.h" + +#ifndef INT_MAX +#define INT_MAX 123456789 /* easy to flag and big enough for our needs */ +#endif + +#define MAX_PUSH 10000000 + +typedef enum +{ + XML_REGEXP_ERROR_NONE, + XML_REGEXP_ERROR_MEMORY, + XML_REGEXP_ERROR_COMPILE +} xmlRegexpError; + +#define REGEXP_ERROR(str) xmlRegexpErrCompile(ctxt, str); + +#ifdef XSDE_CUSTOM_ALLOCATOR +#define REGEXP_MALLOC(n) xsde_alloc(n) +#define REGEXP_REALLOC(p, n) xsde_realloc(p, n) +#define REGEXP_FREE(p) xsde_free(p) +#else +#define REGEXP_MALLOC(n) malloc(n) +#define REGEXP_REALLOC(p, n) realloc(p, n) +#define REGEXP_FREE(p) free(p) +#endif + +#define NEXT ctxt->cur++ +#define CUR (*(ctxt->cur)) +#define NXT(index) (ctxt->cur[index]) + +#define CUR_SCHAR(s, l) xmlStringCurrentChar(s, &l) +#define NEXTL(l) ctxt->cur += l; +#define XML_REG_STRING_SEPARATOR '|' + +/* + * Need PREV to check on a '-' within a Character Group. May only be used + * when it's guaranteed that cur is not at the beginning of ctxt->string! + */ +#define PREV (ctxt->cur[-1]) + + +/************************************************************************ + * * + * Datatypes and structures * + * * + ************************************************************************/ + +/* + * Note: the order of the enums below is significant, do not shuffle + */ +typedef enum { + XML_REGEXP_EPSILON = 1, + XML_REGEXP_CHARVAL, + XML_REGEXP_RANGES, + XML_REGEXP_SUBREG, /* used for () sub regexps */ + XML_REGEXP_STRING, + XML_REGEXP_ANYCHAR, /* . */ + XML_REGEXP_ANYSPACE, /* \s */ + XML_REGEXP_NOTSPACE, /* \S */ + XML_REGEXP_INITNAME, /* \l */ + XML_REGEXP_NOTINITNAME, /* \L */ + XML_REGEXP_NAMECHAR, /* \c */ + XML_REGEXP_NOTNAMECHAR, /* \C */ + XML_REGEXP_DECIMAL, /* \d */ + XML_REGEXP_NOTDECIMAL, /* \D */ + XML_REGEXP_REALCHAR, /* \w */ + XML_REGEXP_NOTREALCHAR, /* \W */ + XML_REGEXP_LETTER = 100, + XML_REGEXP_LETTER_UPPERCASE, + XML_REGEXP_LETTER_LOWERCASE, + XML_REGEXP_LETTER_TITLECASE, + XML_REGEXP_LETTER_MODIFIER, + XML_REGEXP_LETTER_OTHERS, + XML_REGEXP_MARK, + XML_REGEXP_MARK_NONSPACING, + XML_REGEXP_MARK_SPACECOMBINING, + XML_REGEXP_MARK_ENCLOSING, + XML_REGEXP_NUMBER, + XML_REGEXP_NUMBER_DECIMAL, + XML_REGEXP_NUMBER_LETTER, + XML_REGEXP_NUMBER_OTHERS, + XML_REGEXP_PUNCT, + XML_REGEXP_PUNCT_CONNECTOR, + XML_REGEXP_PUNCT_DASH, + XML_REGEXP_PUNCT_OPEN, + XML_REGEXP_PUNCT_CLOSE, + XML_REGEXP_PUNCT_INITQUOTE, + XML_REGEXP_PUNCT_FINQUOTE, + XML_REGEXP_PUNCT_OTHERS, + XML_REGEXP_SEPAR, + XML_REGEXP_SEPAR_SPACE, + XML_REGEXP_SEPAR_LINE, + XML_REGEXP_SEPAR_PARA, + XML_REGEXP_SYMBOL, + XML_REGEXP_SYMBOL_MATH, + XML_REGEXP_SYMBOL_CURRENCY, + XML_REGEXP_SYMBOL_MODIFIER, + XML_REGEXP_SYMBOL_OTHERS, + XML_REGEXP_OTHER, + XML_REGEXP_OTHER_CONTROL, + XML_REGEXP_OTHER_FORMAT, + XML_REGEXP_OTHER_PRIVATE, + XML_REGEXP_OTHER_NA, + XML_REGEXP_BLOCK_NAME +} xmlRegAtomType; + +typedef enum { + XML_REGEXP_QUANT_EPSILON = 1, + XML_REGEXP_QUANT_ONCE, + XML_REGEXP_QUANT_OPT, + XML_REGEXP_QUANT_MULT, + XML_REGEXP_QUANT_PLUS, + XML_REGEXP_QUANT_ONCEONLY, + XML_REGEXP_QUANT_ALL, + XML_REGEXP_QUANT_RANGE +} xmlRegQuantType; + +typedef enum { + XML_REGEXP_START_STATE = 1, + XML_REGEXP_FINAL_STATE, + XML_REGEXP_TRANS_STATE, + XML_REGEXP_SINK_STATE, + XML_REGEXP_UNREACH_STATE +} xmlRegStateType; + +typedef enum { + XML_REGEXP_MARK_NORMAL = 0, + XML_REGEXP_MARK_START, + XML_REGEXP_MARK_VISITED +} xmlRegMarkedType; + +typedef struct _xmlRegRange xmlRegRange; +typedef xmlRegRange *xmlRegRangePtr; + +struct _xmlRegRange { + int neg; /* 0 normal, 1 not, 2 exclude */ + xmlRegAtomType type; + int start; + int end; + xmlChar *blockName; +}; + +typedef struct _xmlRegAtom xmlRegAtom; +typedef xmlRegAtom *xmlRegAtomPtr; + +typedef struct _xmlAutomataState xmlRegState; +typedef xmlRegState *xmlRegStatePtr; + +struct _xmlRegAtom { + int no; + xmlRegAtomType type; + xmlRegQuantType quant; + int min; + int max; + + void *valuep; + void *valuep2; + int neg; + int codepoint; + xmlRegStatePtr start; + xmlRegStatePtr start0; + xmlRegStatePtr stop; + int maxRanges; + int nbRanges; + xmlRegRangePtr *ranges; + void *data; +}; + +typedef struct _xmlRegCounter xmlRegCounter; +typedef xmlRegCounter *xmlRegCounterPtr; + +struct _xmlRegCounter { + int min; + int max; +}; + +typedef struct _xmlRegTrans xmlRegTrans; +typedef xmlRegTrans *xmlRegTransPtr; + +struct _xmlRegTrans { + xmlRegAtomPtr atom; + int to; + int counter; + int count; + int nd; +}; + +typedef struct _xmlAutomataState xmlAutomataState; +typedef xmlAutomataState *xmlAutomataStatePtr; + +struct _xmlAutomataState { + xmlRegStateType type; + xmlRegMarkedType mark; + xmlRegMarkedType reached; + int no; + int maxTrans; + int nbTrans; + xmlRegTrans *trans; + /* knowing states ponting to us can speed things up */ + int maxTransTo; + int nbTransTo; + int *transTo; +}; + +typedef struct _xmlAutomata xmlRegParserCtxt; +typedef xmlRegParserCtxt *xmlRegParserCtxtPtr; + +#define AM_AUTOMATA_RNG 1 + +typedef struct _xmlAutomata xmlAutomata; +typedef xmlAutomata *xmlAutomataPtr; + +struct _xmlAutomata { + xmlChar *string; + xmlChar *cur; + + int error; + const char *error_str; + int neg; + + xmlRegStatePtr start; + xmlRegStatePtr end; + xmlRegStatePtr state; + + xmlRegAtomPtr atom; + + int maxAtoms; + int nbAtoms; + xmlRegAtomPtr *atoms; + + int maxStates; + int nbStates; + xmlRegStatePtr *states; + + int maxCounters; + int nbCounters; + xmlRegCounter *counters; + + int determinist; + int negs; + int flags; +}; + +struct _xmlRegexp { + xmlChar *string; + int nbStates; + xmlRegStatePtr *states; + int nbAtoms; + xmlRegAtomPtr *atoms; + int nbCounters; + xmlRegCounter *counters; + int determinist; + int flags; + /* + * That's the compact form for determinists automatas + */ + int nbstates; + int *compact; + void **transdata; + int nbstrings; + xmlChar **stringMap; +}; + +typedef struct _xmlRegExecRollback xmlRegExecRollback; +typedef xmlRegExecRollback *xmlRegExecRollbackPtr; + +struct _xmlRegExecRollback { + xmlRegStatePtr state;/* the current state */ + int index; /* the index in the input stack */ + int nextbranch; /* the next transition to explore in that state */ + int *counts; /* save the automata state if it has some */ +}; + +typedef struct _xmlRegInputToken xmlRegInputToken; +typedef xmlRegInputToken *xmlRegInputTokenPtr; + +struct _xmlRegInputToken { + xmlChar *value; + void *data; +}; + +typedef struct _xmlRegExecCtxt xmlRegExecCtxt; +typedef xmlRegExecCtxt *xmlRegExecCtxtPtr; + +typedef void (*xmlRegExecCallbacks) (xmlRegExecCtxtPtr exec, + const xmlChar *token, + void *transdata, + void *inputdata); + +struct _xmlRegExecCtxt { + int status; /* execution status != 0 indicate an error */ + int determinist; /* did we find an indeterministic behaviour */ + xmlRegexpPtr comp; /* the compiled regexp */ + xmlRegExecCallbacks callback; + void *data; + + xmlRegStatePtr state;/* the current state */ + int transno; /* the current transition on that state */ + int transcount; /* the number of chars in char counted transitions */ + + /* + * A stack of rollback states + */ + int maxRollbacks; + int nbRollbacks; + xmlRegExecRollback *rollbacks; + + /* + * The state of the automata if any + */ + int *counts; + + /* + * The input stack + */ + int inputStackMax; + int inputStackNr; + int index; + int *charStack; + const xmlChar *inputString; /* when operating on characters */ + xmlRegInputTokenPtr inputStack;/* when operating on strings */ + + /* + * error handling + */ + int errStateNo; /* the error state number */ + xmlRegStatePtr errState; /* the error state */ + xmlChar *errString; /* the string raising the error */ + int *errCounts; /* counters at the error state */ + int nbPush; +}; + +#define REGEXP_ALL_COUNTER 0x123456 +#define REGEXP_ALL_LAX_COUNTER 0x123457 + +static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top); +static void xmlRegFreeState(xmlRegStatePtr state); +static void xmlRegFreeAtom(xmlRegAtomPtr atom); +static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr); +static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint); +static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, + int neg, int start, int end, const xmlChar *blockName); + +static xmlAutomataPtr xmlNewAutomata(void); +static void xmlFreeAutomata(xmlAutomataPtr am); + +static int +xmlStrEqual (const xmlChar* s1, const xmlChar* s2) +{ + return strcmp ((const char*) (s1), (const char*) (s2)) == 0; +} + +static xmlChar* +xmlStrdup (const xmlChar* s) +{ + return (xmlChar*) (strdup ((const char*) (s))); +} + +static xmlChar* +xmlStrndup (const xmlChar* s, int len) +{ + unsigned int n = (unsigned int)len; + + char* r = (char*) REGEXP_MALLOC (n + 1); + + if (r != NULL) + { + memcpy (r, s, n); + r[n] = '\0'; + } + + return (xmlChar*)r; +} + +/** + * xmlStringCurrentChar: + * @ctxt: the XML parser context + * @cur: pointer to the beginning of the char + * @len: pointer to the length of the char read + * + * The current char value, if using UTF-8 this may actually span multiple + * bytes in the input buffer. + * + * Returns the current char value and its length + */ + +static int +xmlStringCurrentChar(const xmlChar * cur, int *len) +{ + /* + * We are supposed to handle UTF8, check it's valid + * From rfc2044: encoding of the Unicode values on UTF-8: + * + * UCS-4 range (hex.) UTF-8 octet sequence (binary) + * 0000 0000-0000 007F 0xxxxxxx + * 0000 0080-0000 07FF 110xxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * + * Check for the 0x110000 limit too + */ + unsigned char c; + unsigned int val; + + c = *cur; + if (c & 0x80) + { + if ((cur[1] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xe0) == 0xe0) + { + + if ((cur[2] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xf0) == 0xf0) + { + if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) + goto encoding_error; + /* 4-byte code */ + *len = 4; + val = (cur[0] & 0x7) << 18; + val |= (cur[1] & 0x3f) << 12; + val |= (cur[2] & 0x3f) << 6; + val |= cur[3] & 0x3f; + } + else + { + /* 3-byte code */ + *len = 3; + val = (cur[0] & 0xf) << 12; + val |= (cur[1] & 0x3f) << 6; + val |= cur[2] & 0x3f; + } + } + else + { + /* 2-byte code */ + *len = 2; + val = (cur[0] & 0x1f) << 6; + val |= cur[1] & 0x3f; + } + + return (val); + } + else + { + /* 1-byte code */ + *len = 1; + return ((int) *cur); + } + + /* + * Assume it's a fixed length encoding (1) with + * a compatible encoding for the ASCII set, since + * XML constructs only use < 128 chars + */ + *len = 1; + return ((int) *cur); + +encoding_error: + + /* + * An encoding problem may arise from a truncated input buffer + * splitting a character in the middle. In that case do not raise + * an error but return 0 to endicate an end of stream problem + */ + *len = 0; + return(0); +} + +/************************************************************************ + * * + * Regexp memory error handler * + * * + ************************************************************************/ +/** + * xmlRegexpErrMemory: + * @extra: extra information + * + * Handle an out of memory condition + */ +static void +xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra) +{ + if (ctxt != NULL) { + ctxt->error = XML_REGEXP_ERROR_MEMORY; + ctxt->error_str = extra; + } +} + +/** + * xmlRegexpErrCompile: + * @extra: extra information + * + * Handle a compilation failure + */ +static void +xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra) +{ + if (ctxt != NULL) { + ctxt->error = XML_REGEXP_ERROR_COMPILE; + ctxt->error_str = extra; + } +} + +/************************************************************************ + * * + * Allocation/Deallocation * + * * + ************************************************************************/ + +static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt); +/** + * xmlRegEpxFromParse: + * @ctxt: the parser context used to build it + * + * Allocate a new regexp and fill it with the result from the parser + * + * Returns the new regexp or NULL in case of error + */ +static xmlRegexpPtr +xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) { + xmlRegexpPtr ret; + + ret = (xmlRegexpPtr) REGEXP_MALLOC(sizeof(xmlRegexp)); + if (ret == NULL) { + xmlRegexpErrMemory(ctxt, "compiling regexp"); + return(NULL); + } + memset(ret, 0, sizeof(xmlRegexp)); + ret->string = ctxt->string; + ret->nbStates = ctxt->nbStates; + ret->states = ctxt->states; + ret->nbAtoms = ctxt->nbAtoms; + ret->atoms = ctxt->atoms; + ret->nbCounters = ctxt->nbCounters; + ret->counters = ctxt->counters; + ret->determinist = ctxt->determinist; + ret->flags = ctxt->flags; + if (ret->determinist == -1) { + xmlRegexpIsDeterminist(ret); + } + + if ((ret->determinist != 0) && + (ret->nbCounters == 0) && + (ctxt->negs == 0) && + (ret->atoms != NULL) && + (ret->atoms[0] != NULL) && + (ret->atoms[0]->type == XML_REGEXP_STRING)) { + int i, j, nbstates = 0, nbatoms = 0; + int *stateRemap; + int *stringRemap; + int *transitions; + void **transdata; + xmlChar **stringMap; + xmlChar *value; + + /* + * Switch to a compact representation + * 1/ counting the effective number of states left + * 2/ counting the unique number of atoms, and check that + * they are all of the string type + * 3/ build a table state x atom for the transitions + */ + + stateRemap = REGEXP_MALLOC(ret->nbStates * sizeof(int)); + if (stateRemap == NULL) { + xmlRegexpErrMemory(ctxt, "compiling regexp"); + REGEXP_FREE(ret); + return(NULL); + } + for (i = 0;i < ret->nbStates;i++) { + if (ret->states[i] != NULL) { + stateRemap[i] = nbstates; + nbstates++; + } else { + stateRemap[i] = -1; + } + } +#ifdef DEBUG_COMPACTION + printf("Final: %d states\n", nbstates); +#endif + stringMap = REGEXP_MALLOC(ret->nbAtoms * sizeof(char *)); + if (stringMap == NULL) { + xmlRegexpErrMemory(ctxt, "compiling regexp"); + REGEXP_FREE(stateRemap); + REGEXP_FREE(ret); + return(NULL); + } + stringRemap = REGEXP_MALLOC(ret->nbAtoms * sizeof(int)); + if (stringRemap == NULL) { + xmlRegexpErrMemory(ctxt, "compiling regexp"); + REGEXP_FREE(stringMap); + REGEXP_FREE(stateRemap); + REGEXP_FREE(ret); + return(NULL); + } + for (i = 0;i < ret->nbAtoms;i++) { + if ((ret->atoms[i]->type == XML_REGEXP_STRING) && + (ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) { + value = ret->atoms[i]->valuep; + for (j = 0;j < nbatoms;j++) { + if (xmlStrEqual(stringMap[j], value)) { + stringRemap[i] = j; + break; + } + } + if (j >= nbatoms) { + stringRemap[i] = nbatoms; + stringMap[nbatoms] = xmlStrdup(value); + if (stringMap[nbatoms] == NULL) { + for (i = 0;i < nbatoms;i++) + REGEXP_FREE(stringMap[i]); + REGEXP_FREE(stringRemap); + REGEXP_FREE(stringMap); + REGEXP_FREE(stateRemap); + REGEXP_FREE(ret); + return(NULL); + } + nbatoms++; + } + } else { + REGEXP_FREE(stateRemap); + REGEXP_FREE(stringRemap); + for (i = 0;i < nbatoms;i++) + REGEXP_FREE(stringMap[i]); + REGEXP_FREE(stringMap); + REGEXP_FREE(ret); + return(NULL); + } + } +#ifdef DEBUG_COMPACTION + printf("Final: %d atoms\n", nbatoms); +#endif + transitions = (int *) REGEXP_MALLOC((nbstates + 1) * + (nbatoms + 1) * sizeof(int)); + if (transitions == NULL) { + REGEXP_FREE(stateRemap); + REGEXP_FREE(stringRemap); + REGEXP_FREE(stringMap); + REGEXP_FREE(ret); + return(NULL); + } + memset(transitions, 0, (nbstates + 1) * (nbatoms + 1) * sizeof(int)); + + /* + * Allocate the transition table. The first entry for each + * state corresponds to the state type. + */ + transdata = NULL; + + for (i = 0;i < ret->nbStates;i++) { + int stateno, atomno, targetno, prev; + xmlRegStatePtr state; + xmlRegTransPtr trans; + + stateno = stateRemap[i]; + if (stateno == -1) + continue; + state = ret->states[i]; + + transitions[stateno * (nbatoms + 1)] = state->type; + + for (j = 0;j < state->nbTrans;j++) { + trans = &(state->trans[j]); + if ((trans->to == -1) || (trans->atom == NULL)) + continue; + atomno = stringRemap[trans->atom->no]; + if ((trans->atom->data != NULL) && (transdata == NULL)) { + transdata = (void **) REGEXP_MALLOC(nbstates * nbatoms * + sizeof(void *)); + if (transdata != NULL) + memset(transdata, 0, + nbstates * nbatoms * sizeof(void *)); + else { + xmlRegexpErrMemory(ctxt, "compiling regexp"); + break; + } + } + targetno = stateRemap[trans->to]; + /* + * if the same atom can generate transitions to 2 different + * states then it means the automata is not determinist and + * the compact form can't be used ! + */ + prev = transitions[stateno * (nbatoms + 1) + atomno + 1]; + if (prev != 0) { + if (prev != targetno + 1) { + ret->determinist = 0; +#ifdef DEBUG_COMPACTION + printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n", + i, j, trans->atom->no, trans->to, atomno, targetno); + printf(" previous to is %d\n", prev); +#endif + if (transdata != NULL) + REGEXP_FREE(transdata); + REGEXP_FREE(transitions); + REGEXP_FREE(stateRemap); + REGEXP_FREE(stringRemap); + for (i = 0;i < nbatoms;i++) + REGEXP_FREE(stringMap[i]); + REGEXP_FREE(stringMap); + goto not_determ; + } + } else { +#if 0 + printf("State %d trans %d: atom %d to %d : %d to %d\n", + i, j, trans->atom->no, trans->to, atomno, targetno); +#endif + transitions[stateno * (nbatoms + 1) + atomno + 1] = + targetno + 1; /* to avoid 0 */ + if (transdata != NULL) + transdata[stateno * nbatoms + atomno] = + trans->atom->data; + } + } + } + ret->determinist = 1; +#ifdef DEBUG_COMPACTION + /* + * Debug + */ + for (i = 0;i < nbstates;i++) { + for (j = 0;j < nbatoms + 1;j++) { + printf("%02d ", transitions[i * (nbatoms + 1) + j]); + } + printf("\n"); + } + printf("\n"); +#endif + /* + * Cleanup of the old data + */ + if (ret->states != NULL) { + for (i = 0;i < ret->nbStates;i++) + xmlRegFreeState(ret->states[i]); + REGEXP_FREE(ret->states); + } + ret->states = NULL; + ret->nbStates = 0; + if (ret->atoms != NULL) { + for (i = 0;i < ret->nbAtoms;i++) + xmlRegFreeAtom(ret->atoms[i]); + REGEXP_FREE(ret->atoms); + } + ret->atoms = NULL; + ret->nbAtoms = 0; + + ret->compact = transitions; + ret->transdata = transdata; + ret->stringMap = stringMap; + ret->nbstrings = nbatoms; + ret->nbstates = nbstates; + REGEXP_FREE(stateRemap); + REGEXP_FREE(stringRemap); + } +not_determ: + ctxt->string = NULL; + ctxt->nbStates = 0; + ctxt->states = NULL; + ctxt->nbAtoms = 0; + ctxt->atoms = NULL; + ctxt->nbCounters = 0; + ctxt->counters = NULL; + return(ret); +} + +/** + * xmlRegNewParserCtxt: + * @string: the string to parse + * + * Allocate a new regexp parser context + * + * Returns the new context or NULL in case of error + */ +static xmlRegParserCtxtPtr +xmlRegNewParserCtxt(const xmlChar *string) { + xmlRegParserCtxtPtr ret; + + ret = (xmlRegParserCtxtPtr) REGEXP_MALLOC(sizeof(xmlRegParserCtxt)); + if (ret == NULL) + return(NULL); + memset(ret, 0, sizeof(xmlRegParserCtxt)); + if (string != NULL) + ret->string = xmlStrdup(string); + ret->cur = ret->string; + ret->neg = 0; + ret->negs = 0; + ret->error = 0; + ret->determinist = -1; + return(ret); +} + +/** + * xmlRegNewRange: + * @ctxt: the regexp parser context + * @neg: is that negative + * @type: the type of range + * @start: the start codepoint + * @end: the end codepoint + * + * Allocate a new regexp range + * + * Returns the new range or NULL in case of error + */ +static xmlRegRangePtr +xmlRegNewRange(xmlRegParserCtxtPtr ctxt, + int neg, xmlRegAtomType type, int start, int end) { + xmlRegRangePtr ret; + + ret = (xmlRegRangePtr) REGEXP_MALLOC(sizeof(xmlRegRange)); + if (ret == NULL) { + xmlRegexpErrMemory(ctxt, "allocating range"); + return(NULL); + } + ret->neg = neg; + ret->type = type; + ret->start = start; + ret->end = end; + return(ret); +} + +/** + * xmlRegFreeRange: + * @range: the regexp range + * + * Free a regexp range + */ +static void +xmlRegFreeRange(xmlRegRangePtr range) { + if (range == NULL) + return; + + if (range->blockName != NULL) + REGEXP_FREE(range->blockName); + REGEXP_FREE(range); +} + +/** + * xmlRegCopyRange: + * @range: the regexp range + * + * Copy a regexp range + * + * Returns the new copy or NULL in case of error. + */ +static xmlRegRangePtr +xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr range) { + xmlRegRangePtr ret; + + if (range == NULL) + return(NULL); + + ret = xmlRegNewRange(ctxt, range->neg, range->type, range->start, + range->end); + if (ret == NULL) + return(NULL); + if (range->blockName != NULL) { + ret->blockName = xmlStrdup(range->blockName); + if (ret->blockName == NULL) { + xmlRegexpErrMemory(ctxt, "allocating range"); + xmlRegFreeRange(ret); + return(NULL); + } + } + return(ret); +} + +/** + * xmlRegNewAtom: + * @ctxt: the regexp parser context + * @type: the type of atom + * + * Allocate a new atom + * + * Returns the new atom or NULL in case of error + */ +static xmlRegAtomPtr +xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) { + xmlRegAtomPtr ret; + + ret = (xmlRegAtomPtr) REGEXP_MALLOC(sizeof(xmlRegAtom)); + if (ret == NULL) { + xmlRegexpErrMemory(ctxt, "allocating atom"); + return(NULL); + } + memset(ret, 0, sizeof(xmlRegAtom)); + ret->type = type; + ret->quant = XML_REGEXP_QUANT_ONCE; + ret->min = 0; + ret->max = 0; + return(ret); +} + +/** + * xmlRegFreeAtom: + * @atom: the regexp atom + * + * Free a regexp atom + */ +static void +xmlRegFreeAtom(xmlRegAtomPtr atom) { + int i; + + if (atom == NULL) + return; + + for (i = 0;i < atom->nbRanges;i++) + xmlRegFreeRange(atom->ranges[i]); + if (atom->ranges != NULL) + REGEXP_FREE(atom->ranges); + if ((atom->type == XML_REGEXP_STRING) && (atom->valuep != NULL)) + REGEXP_FREE(atom->valuep); + if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 != NULL)) + REGEXP_FREE(atom->valuep2); + if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep != NULL)) + REGEXP_FREE(atom->valuep); + REGEXP_FREE(atom); +} + +/** + * xmlRegCopyAtom: + * @ctxt: the regexp parser context + * @atom: the oiginal atom + * + * Allocate a new regexp range + * + * Returns the new atom or NULL in case of error + */ +static xmlRegAtomPtr +xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) { + xmlRegAtomPtr ret; + + ret = (xmlRegAtomPtr) REGEXP_MALLOC(sizeof(xmlRegAtom)); + if (ret == NULL) { + xmlRegexpErrMemory(ctxt, "copying atom"); + return(NULL); + } + memset(ret, 0, sizeof(xmlRegAtom)); + ret->type = atom->type; + ret->quant = atom->quant; + ret->min = atom->min; + ret->max = atom->max; + if (atom->nbRanges > 0) { + int i; + + ret->ranges = (xmlRegRangePtr *) REGEXP_MALLOC(sizeof(xmlRegRangePtr) * + atom->nbRanges); + if (ret->ranges == NULL) { + xmlRegexpErrMemory(ctxt, "copying atom"); + goto error; + } + for (i = 0;i < atom->nbRanges;i++) { + ret->ranges[i] = xmlRegCopyRange(ctxt, atom->ranges[i]); + if (ret->ranges[i] == NULL) + goto error; + ret->nbRanges = i + 1; + } + } + return(ret); + +error: + xmlRegFreeAtom(ret); + return(NULL); +} + +static xmlRegStatePtr +xmlRegNewState(xmlRegParserCtxtPtr ctxt) { + xmlRegStatePtr ret; + + ret = (xmlRegStatePtr) REGEXP_MALLOC(sizeof(xmlRegState)); + if (ret == NULL) { + xmlRegexpErrMemory(ctxt, "allocating state"); + return(NULL); + } + memset(ret, 0, sizeof(xmlRegState)); + ret->type = XML_REGEXP_TRANS_STATE; + ret->mark = XML_REGEXP_MARK_NORMAL; + return(ret); +} + +/** + * xmlRegFreeState: + * @state: the regexp state + * + * Free a regexp state + */ +static void +xmlRegFreeState(xmlRegStatePtr state) { + if (state == NULL) + return; + + if (state->trans != NULL) + REGEXP_FREE(state->trans); + if (state->transTo != NULL) + REGEXP_FREE(state->transTo); + REGEXP_FREE(state); +} + +/** + * xmlRegFreeParserCtxt: + * @ctxt: the regexp parser context + * + * Free a regexp parser context + */ +static void +xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) { + int i; + if (ctxt == NULL) + return; + + if (ctxt->string != NULL) + REGEXP_FREE(ctxt->string); + if (ctxt->states != NULL) { + for (i = 0;i < ctxt->nbStates;i++) + xmlRegFreeState(ctxt->states[i]); + REGEXP_FREE(ctxt->states); + } + if (ctxt->atoms != NULL) { + for (i = 0;i < ctxt->nbAtoms;i++) + xmlRegFreeAtom(ctxt->atoms[i]); + REGEXP_FREE(ctxt->atoms); + } + if (ctxt->counters != NULL) + REGEXP_FREE(ctxt->counters); + REGEXP_FREE(ctxt); +} + +/************************************************************************ + * * + * Finite Automata structures manipulations * + * * + ************************************************************************/ + +static void +xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom, + int neg, xmlRegAtomType type, int start, int end, + xmlChar *blockName) { + xmlRegRangePtr range; + + if (atom == NULL) { + REGEXP_ERROR("add range: atom is NULL"); + return; + } + if (atom->type != XML_REGEXP_RANGES) { + REGEXP_ERROR("add range: atom is not ranges"); + return; + } + if (atom->maxRanges == 0) { + atom->maxRanges = 4; + atom->ranges = (xmlRegRangePtr *) REGEXP_MALLOC(atom->maxRanges * + sizeof(xmlRegRangePtr)); + if (atom->ranges == NULL) { + xmlRegexpErrMemory(ctxt, "adding ranges"); + atom->maxRanges = 0; + return; + } + } else if (atom->nbRanges >= atom->maxRanges) { + xmlRegRangePtr *tmp; + atom->maxRanges *= 2; + tmp = (xmlRegRangePtr *) REGEXP_REALLOC(atom->ranges, atom->maxRanges * + sizeof(xmlRegRangePtr)); + if (tmp == NULL) { + xmlRegexpErrMemory(ctxt, "adding ranges"); + atom->maxRanges /= 2; + return; + } + atom->ranges = tmp; + } + range = xmlRegNewRange(ctxt, neg, type, start, end); + if (range == NULL) + return; + range->blockName = blockName; + atom->ranges[atom->nbRanges++] = range; + +} + +static int +xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) { + if (ctxt->maxCounters == 0) { + ctxt->maxCounters = 4; + ctxt->counters = (xmlRegCounter *) REGEXP_MALLOC(ctxt->maxCounters * + sizeof(xmlRegCounter)); + if (ctxt->counters == NULL) { + xmlRegexpErrMemory(ctxt, "allocating counter"); + ctxt->maxCounters = 0; + return(-1); + } + } else if (ctxt->nbCounters >= ctxt->maxCounters) { + xmlRegCounter *tmp; + ctxt->maxCounters *= 2; + tmp = (xmlRegCounter *) REGEXP_REALLOC(ctxt->counters, ctxt->maxCounters * + sizeof(xmlRegCounter)); + if (tmp == NULL) { + xmlRegexpErrMemory(ctxt, "allocating counter"); + ctxt->maxCounters /= 2; + return(-1); + } + ctxt->counters = tmp; + } + ctxt->counters[ctxt->nbCounters].min = -1; + ctxt->counters[ctxt->nbCounters].max = -1; + return(ctxt->nbCounters++); +} + +static int +xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) { + if (atom == NULL) { + REGEXP_ERROR("atom push: atom is NULL"); + return(-1); + } + if (ctxt->maxAtoms == 0) { + ctxt->maxAtoms = 4; + ctxt->atoms = (xmlRegAtomPtr *) REGEXP_MALLOC(ctxt->maxAtoms * + sizeof(xmlRegAtomPtr)); + if (ctxt->atoms == NULL) { + xmlRegexpErrMemory(ctxt, "pushing atom"); + ctxt->maxAtoms = 0; + return(-1); + } + } else if (ctxt->nbAtoms >= ctxt->maxAtoms) { + xmlRegAtomPtr *tmp; + ctxt->maxAtoms *= 2; + tmp = (xmlRegAtomPtr *) REGEXP_REALLOC(ctxt->atoms, ctxt->maxAtoms * + sizeof(xmlRegAtomPtr)); + if (tmp == NULL) { + xmlRegexpErrMemory(ctxt, "allocating counter"); + ctxt->maxAtoms /= 2; + return(-1); + } + ctxt->atoms = tmp; + } + atom->no = ctxt->nbAtoms; + ctxt->atoms[ctxt->nbAtoms++] = atom; + return(0); +} + +static void +xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target, + int from) { + if (target->maxTransTo == 0) { + target->maxTransTo = 8; + target->transTo = (int *) REGEXP_MALLOC(target->maxTransTo * + sizeof(int)); + if (target->transTo == NULL) { + xmlRegexpErrMemory(ctxt, "adding transition"); + target->maxTransTo = 0; + return; + } + } else if (target->nbTransTo >= target->maxTransTo) { + int *tmp; + target->maxTransTo *= 2; + tmp = (int *) REGEXP_REALLOC(target->transTo, target->maxTransTo * + sizeof(int)); + if (tmp == NULL) { + xmlRegexpErrMemory(ctxt, "adding transition"); + target->maxTransTo /= 2; + return; + } + target->transTo = tmp; + } + target->transTo[target->nbTransTo] = from; + target->nbTransTo++; +} + +static void +xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, + xmlRegAtomPtr atom, xmlRegStatePtr target, + int counter, int count) { + + int nrtrans; + + if (state == NULL) { + REGEXP_ERROR("add state: state is NULL"); + return; + } + if (target == NULL) { + REGEXP_ERROR("add state: target is NULL"); + return; + } + /* + * Other routines follow the philosophy 'When in doubt, add a transition' + * so we check here whether such a transition is already present and, if + * so, silently ignore this request. + */ + + for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) { + xmlRegTransPtr trans = &(state->trans[nrtrans]); + if ((trans->atom == atom) && + (trans->to == target->no) && + (trans->counter == counter) && + (trans->count == count)) { +#ifdef DEBUG_REGEXP_GRAPH + printf("Ignoring duplicate transition from %d to %d\n", + state->no, target->no); +#endif + return; + } + } + + if (state->maxTrans == 0) { + state->maxTrans = 8; + state->trans = (xmlRegTrans *) REGEXP_MALLOC(state->maxTrans * + sizeof(xmlRegTrans)); + if (state->trans == NULL) { + xmlRegexpErrMemory(ctxt, "adding transition"); + state->maxTrans = 0; + return; + } + } else if (state->nbTrans >= state->maxTrans) { + xmlRegTrans *tmp; + state->maxTrans *= 2; + tmp = (xmlRegTrans *) REGEXP_REALLOC(state->trans, state->maxTrans * + sizeof(xmlRegTrans)); + if (tmp == NULL) { + xmlRegexpErrMemory(ctxt, "adding transition"); + state->maxTrans /= 2; + return; + } + state->trans = tmp; + } +#ifdef DEBUG_REGEXP_GRAPH + printf("Add trans from %d to %d ", state->no, target->no); + if (count == REGEXP_ALL_COUNTER) + printf("all transition\n"); + else if (count >= 0) + printf("count based %d\n", count); + else if (counter >= 0) + printf("counted %d\n", counter); + else if (atom == NULL) + printf("epsilon transition\n"); + else if (atom != NULL) + xmlRegPrintAtom(stdout, atom); +#endif + + state->trans[state->nbTrans].atom = atom; + state->trans[state->nbTrans].to = target->no; + state->trans[state->nbTrans].counter = counter; + state->trans[state->nbTrans].count = count; + state->trans[state->nbTrans].nd = 0; + state->nbTrans++; + xmlRegStateAddTransTo(ctxt, target, state->no); +} + +static int +xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) { + if (state == NULL) return(-1); + if (ctxt->maxStates == 0) { + ctxt->maxStates = 4; + ctxt->states = (xmlRegStatePtr *) REGEXP_MALLOC(ctxt->maxStates * + sizeof(xmlRegStatePtr)); + if (ctxt->states == NULL) { + xmlRegexpErrMemory(ctxt, "adding state"); + ctxt->maxStates = 0; + return(-1); + } + } else if (ctxt->nbStates >= ctxt->maxStates) { + xmlRegStatePtr *tmp; + ctxt->maxStates *= 2; + tmp = (xmlRegStatePtr *) REGEXP_REALLOC(ctxt->states, ctxt->maxStates * + sizeof(xmlRegStatePtr)); + if (tmp == NULL) { + xmlRegexpErrMemory(ctxt, "adding state"); + ctxt->maxStates /= 2; + return(-1); + } + ctxt->states = tmp; + } + state->no = ctxt->nbStates; + ctxt->states[ctxt->nbStates++] = state; + return(0); +} + +/** + * xmlFAGenerateEpsilonTransition: + * @ctxt: a regexp parser context + * @from: the from state + * @to: the target state or NULL for building a new one + * + */ +static void +xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt, + xmlRegStatePtr from, xmlRegStatePtr to) { + if (to == NULL) { + to = xmlRegNewState(ctxt); + xmlRegStatePush(ctxt, to); + ctxt->state = to; + } + xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1); +} + +/** + * xmlFAGenerateCountedEpsilonTransition: + * @ctxt: a regexp parser context + * @from: the from state + * @to: the target state or NULL for building a new one + * counter: the counter for that transition + * + */ +static void +xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt, + xmlRegStatePtr from, xmlRegStatePtr to, int counter) { + if (to == NULL) { + to = xmlRegNewState(ctxt); + xmlRegStatePush(ctxt, to); + ctxt->state = to; + } + xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1); +} + +/** + * xmlFAGenerateCountedTransition: + * @ctxt: a regexp parser context + * @from: the from state + * @to: the target state or NULL for building a new one + * counter: the counter for that transition + * + */ +static void +xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt, + xmlRegStatePtr from, xmlRegStatePtr to, int counter) { + if (to == NULL) { + to = xmlRegNewState(ctxt); + xmlRegStatePush(ctxt, to); + ctxt->state = to; + } + xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter); +} + +/** + * xmlFAGenerateTransitions: + * @ctxt: a regexp parser context + * @from: the from state + * @to: the target state or NULL for building a new one + * @atom: the atom generating the transition + * + * Returns 0 if success and -1 in case of error. + */ +static int +xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, + xmlRegStatePtr to, xmlRegAtomPtr atom) { + xmlRegStatePtr end; + + if (atom == NULL) { + REGEXP_ERROR("genrate transition: atom == NULL"); + return(-1); + } + if (atom->type == XML_REGEXP_SUBREG) { + /* + * this is a subexpression handling one should not need to + * create a new node except for XML_REGEXP_QUANT_RANGE. + */ + if (xmlRegAtomPush(ctxt, atom) < 0) { + return(-1); + } + if ((to != NULL) && (atom->stop != to) && + (atom->quant != XML_REGEXP_QUANT_RANGE)) { + /* + * Generate an epsilon transition to link to the target + */ + xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to); +#ifdef DV + } else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) && + (atom->quant != XML_REGEXP_QUANT_ONCE)) { + to = xmlRegNewState(ctxt); + xmlRegStatePush(ctxt, to); + ctxt->state = to; + xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to); +#endif + } + switch (atom->quant) { + case XML_REGEXP_QUANT_OPT: + atom->quant = XML_REGEXP_QUANT_ONCE; + /* + * transition done to the state after end of atom. + * 1. set transition from atom start to new state + * 2. set transition from atom end to this state. + */ + if (to == NULL) { + xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0); + xmlFAGenerateEpsilonTransition(ctxt, atom->stop, + ctxt->state); + } else { + xmlFAGenerateEpsilonTransition(ctxt, atom->start, to); + } + break; + case XML_REGEXP_QUANT_MULT: + atom->quant = XML_REGEXP_QUANT_ONCE; + xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop); + xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start); + break; + case XML_REGEXP_QUANT_PLUS: + atom->quant = XML_REGEXP_QUANT_ONCE; + xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start); + break; + case XML_REGEXP_QUANT_RANGE: { + int counter; + xmlRegStatePtr inter, newstate; + + /* + * create the final state now if needed + */ + if (to != NULL) { + newstate = to; + } else { + newstate = xmlRegNewState(ctxt); + xmlRegStatePush(ctxt, newstate); + } + + /* + * The principle here is to use counted transition + * to avoid explosion in the number of states in the + * graph. This is clearly more complex but should not + * be exploitable at runtime. + */ + if ((atom->min == 0) && (atom->start0 == NULL)) { + xmlRegAtomPtr copy; + /* + * duplicate a transition based on atom to count next + * occurences after 1. We cannot loop to atom->start + * directly because we need an epsilon transition to + * newstate. + */ + /* ???? For some reason it seems we never reach that + case, I suppose this got optimized out before when + building the automata */ + copy = xmlRegCopyAtom(ctxt, atom); + if (copy == NULL) + return(-1); + copy->quant = XML_REGEXP_QUANT_ONCE; + copy->min = 0; + copy->max = 0; + + if (xmlFAGenerateTransitions(ctxt, atom->start, NULL, copy) + < 0) + return(-1); + inter = ctxt->state; + counter = xmlRegGetCounter(ctxt); + ctxt->counters[counter].min = atom->min - 1; + ctxt->counters[counter].max = atom->max - 1; + /* count the number of times we see it again */ + xmlFAGenerateCountedEpsilonTransition(ctxt, inter, + atom->stop, counter); + /* allow a way out based on the count */ + xmlFAGenerateCountedTransition(ctxt, inter, + newstate, counter); + /* and also allow a direct exit for 0 */ + xmlFAGenerateEpsilonTransition(ctxt, atom->start, + newstate); + } else { + /* + * either we need the atom at least once or there + * is an atom->start0 allowing to easilly plug the + * epsilon transition. + */ + counter = xmlRegGetCounter(ctxt); + ctxt->counters[counter].min = atom->min - 1; + ctxt->counters[counter].max = atom->max - 1; + /* count the number of times we see it again */ + xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop, + atom->start, counter); + /* allow a way out based on the count */ + xmlFAGenerateCountedTransition(ctxt, atom->stop, + newstate, counter); + /* and if needed allow a direct exit for 0 */ + if (atom->min == 0) + xmlFAGenerateEpsilonTransition(ctxt, atom->start0, + newstate); + + } + atom->min = 0; + atom->max = 0; + atom->quant = XML_REGEXP_QUANT_ONCE; + ctxt->state = newstate; + } + default: + break; + } + return(0); + } + if ((atom->min == 0) && (atom->max == 0) && + (atom->quant == XML_REGEXP_QUANT_RANGE)) { + /* + * we can discard the atom and generate an epsilon transition instead + */ + if (to == NULL) { + to = xmlRegNewState(ctxt); + if (to != NULL) + xmlRegStatePush(ctxt, to); + else { + return(-1); + } + } + xmlFAGenerateEpsilonTransition(ctxt, from, to); + ctxt->state = to; + xmlRegFreeAtom(atom); + return(0); + } + if (to == NULL) { + to = xmlRegNewState(ctxt); + if (to != NULL) + xmlRegStatePush(ctxt, to); + else { + return(-1); + } + } + end = to; + if ((atom->quant == XML_REGEXP_QUANT_MULT) || + (atom->quant == XML_REGEXP_QUANT_PLUS)) { + /* + * Do not pollute the target state by adding transitions from + * it as it is likely to be the shared target of multiple branches. + * So isolate with an epsilon transition. + */ + xmlRegStatePtr tmp; + + tmp = xmlRegNewState(ctxt); + if (tmp != NULL) + xmlRegStatePush(ctxt, tmp); + else { + return(-1); + } + xmlFAGenerateEpsilonTransition(ctxt, tmp, to); + to = tmp; + } + if (xmlRegAtomPush(ctxt, atom) < 0) { + return(-1); + } + xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1); + ctxt->state = end; + switch (atom->quant) { + case XML_REGEXP_QUANT_OPT: + atom->quant = XML_REGEXP_QUANT_ONCE; + xmlFAGenerateEpsilonTransition(ctxt, from, to); + break; + case XML_REGEXP_QUANT_MULT: + atom->quant = XML_REGEXP_QUANT_ONCE; + xmlFAGenerateEpsilonTransition(ctxt, from, to); + xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); + break; + case XML_REGEXP_QUANT_PLUS: + atom->quant = XML_REGEXP_QUANT_ONCE; + xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); + break; + case XML_REGEXP_QUANT_RANGE: +#if DV_test + if (atom->min == 0) { + xmlFAGenerateEpsilonTransition(ctxt, from, to); + } +#endif + break; + default: + break; + } + return(0); +} + +/** + * xmlFAReduceEpsilonTransitions: + * @ctxt: a regexp parser context + * @fromnr: the from state + * @tonr: the to state + * @counter: should that transition be associated to a counted + * + */ +static void +xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr, + int tonr, int counter) { + int transnr; + xmlRegStatePtr from; + xmlRegStatePtr to; + +#ifdef DEBUG_REGEXP_GRAPH + printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr); +#endif + from = ctxt->states[fromnr]; + if (from == NULL) + return; + to = ctxt->states[tonr]; + if (to == NULL) + return; + if ((to->mark == XML_REGEXP_MARK_START) || + (to->mark == XML_REGEXP_MARK_VISITED)) + return; + + to->mark = XML_REGEXP_MARK_VISITED; + if (to->type == XML_REGEXP_FINAL_STATE) { +#ifdef DEBUG_REGEXP_GRAPH + printf("State %d is final, so %d becomes final\n", tonr, fromnr); +#endif + from->type = XML_REGEXP_FINAL_STATE; + } + for (transnr = 0;transnr < to->nbTrans;transnr++) { + if (to->trans[transnr].to < 0) + continue; + if (to->trans[transnr].atom == NULL) { + /* + * Don't remove counted transitions + * Don't loop either + */ + if (to->trans[transnr].to != fromnr) { + if (to->trans[transnr].count >= 0) { + int newto = to->trans[transnr].to; + + xmlRegStateAddTrans(ctxt, from, NULL, + ctxt->states[newto], + -1, to->trans[transnr].count); + } else { +#ifdef DEBUG_REGEXP_GRAPH + printf("Found epsilon trans %d from %d to %d\n", + transnr, tonr, to->trans[transnr].to); +#endif + if (to->trans[transnr].counter >= 0) { + xmlFAReduceEpsilonTransitions(ctxt, fromnr, + to->trans[transnr].to, + to->trans[transnr].counter); + } else { + xmlFAReduceEpsilonTransitions(ctxt, fromnr, + to->trans[transnr].to, + counter); + } + } + } + } else { + int newto = to->trans[transnr].to; + + if (to->trans[transnr].counter >= 0) { + xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, + ctxt->states[newto], + to->trans[transnr].counter, -1); + } else { + xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, + ctxt->states[newto], counter, -1); + } + } + } + to->mark = XML_REGEXP_MARK_NORMAL; +} + +/** + * xmlFAEliminateSimpleEpsilonTransitions: + * @ctxt: a regexp parser context + * + * Eliminating general epsilon transitions can get costly in the general + * algorithm due to the large amount of generated new transitions and + * associated comparisons. However for simple epsilon transition used just + * to separate building blocks when generating the automata this can be + * reduced to state elimination: + * - if there exists an epsilon from X to Y + * - if there is no other transition from X + * then X and Y are semantically equivalent and X can be eliminated + * If X is the start state then make Y the start state, else replace the + * target of all transitions to X by transitions to Y. + */ +static void +xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { + int statenr, i, j, newto; + xmlRegStatePtr state, tmp; + + for (statenr = 0;statenr < ctxt->nbStates;statenr++) { + state = ctxt->states[statenr]; + if (state == NULL) + continue; + if (state->nbTrans != 1) + continue; + if (state->type == XML_REGEXP_UNREACH_STATE) + continue; + /* is the only transition out a basic transition */ + if ((state->trans[0].atom == NULL) && + (state->trans[0].to >= 0) && + (state->trans[0].to != statenr) && + (state->trans[0].counter < 0) && + (state->trans[0].count < 0)) { + newto = state->trans[0].to; + + if (state->type == XML_REGEXP_START_STATE) { +#ifdef DEBUG_REGEXP_GRAPH + printf("Found simple epsilon trans from start %d to %d\n", + statenr, newto); +#endif + } else { +#ifdef DEBUG_REGEXP_GRAPH + printf("Found simple epsilon trans from %d to %d\n", + statenr, newto); +#endif + for (i = 0;i < state->nbTransTo;i++) { + tmp = ctxt->states[state->transTo[i]]; + for (j = 0;j < tmp->nbTrans;j++) { + if (tmp->trans[j].to == statenr) { +#ifdef DEBUG_REGEXP_GRAPH + printf("Changed transition %d on %d to go to %d\n", + j, tmp->no, newto); +#endif + tmp->trans[j].to = -1; + xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom, + ctxt->states[newto], + tmp->trans[j].counter, + tmp->trans[j].count); + } + } + } + if (state->type == XML_REGEXP_FINAL_STATE) + ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE; + /* eliminate the transition completely */ + state->nbTrans = 0; + + state->type = XML_REGEXP_UNREACH_STATE; + + } + + } + } +} +/** + * xmlFAEliminateEpsilonTransitions: + * @ctxt: a regexp parser context + * + */ +static void +xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { + int statenr, transnr; + xmlRegStatePtr state; + int has_epsilon; + + if (ctxt->states == NULL) return; + + /* + * Eliminate simple epsilon transition and the associated unreachable + * states. + */ + xmlFAEliminateSimpleEpsilonTransitions(ctxt); + for (statenr = 0;statenr < ctxt->nbStates;statenr++) { + state = ctxt->states[statenr]; + if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) { +#ifdef DEBUG_REGEXP_GRAPH + printf("Removed unreachable state %d\n", statenr); +#endif + xmlRegFreeState(state); + ctxt->states[statenr] = NULL; + } + } + + has_epsilon = 0; + + /* + * Build the completed transitions bypassing the epsilons + * Use a marking algorithm to avoid loops + * Mark sink states too. + * Process from the latests states backward to the start when + * there is long cascading epsilon chains this minimize the + * recursions and transition compares when adding the new ones + */ + for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) { + state = ctxt->states[statenr]; + if (state == NULL) + continue; + if ((state->nbTrans == 0) && + (state->type != XML_REGEXP_FINAL_STATE)) { + state->type = XML_REGEXP_SINK_STATE; + } + for (transnr = 0;transnr < state->nbTrans;transnr++) { + if ((state->trans[transnr].atom == NULL) && + (state->trans[transnr].to >= 0)) { + if (state->trans[transnr].to == statenr) { + state->trans[transnr].to = -1; +#ifdef DEBUG_REGEXP_GRAPH + printf("Removed loopback epsilon trans %d on %d\n", + transnr, statenr); +#endif + } else if (state->trans[transnr].count < 0) { + int newto = state->trans[transnr].to; + +#ifdef DEBUG_REGEXP_GRAPH + printf("Found epsilon trans %d from %d to %d\n", + transnr, statenr, newto); +#endif + has_epsilon = 1; + state->trans[transnr].to = -2; + state->mark = XML_REGEXP_MARK_START; + xmlFAReduceEpsilonTransitions(ctxt, statenr, + newto, state->trans[transnr].counter); + state->mark = XML_REGEXP_MARK_NORMAL; +#ifdef DEBUG_REGEXP_GRAPH + } else { + printf("Found counted transition %d on %d\n", + transnr, statenr); +#endif + } + } + } + } + /* + * Eliminate the epsilon transitions + */ + if (has_epsilon) { + for (statenr = 0;statenr < ctxt->nbStates;statenr++) { + state = ctxt->states[statenr]; + if (state == NULL) + continue; + for (transnr = 0;transnr < state->nbTrans;transnr++) { + xmlRegTransPtr trans = &(state->trans[transnr]); + if ((trans->atom == NULL) && + (trans->count < 0) && + (trans->to >= 0)) { + trans->to = -1; + } + } + } + } + + /* + * Use this pass to detect unreachable states too + */ + for (statenr = 0;statenr < ctxt->nbStates;statenr++) { + state = ctxt->states[statenr]; + if (state != NULL) + state->reached = XML_REGEXP_MARK_NORMAL; + } + state = ctxt->states[0]; + if (state != NULL) + state->reached = XML_REGEXP_MARK_START; + while (state != NULL) { + xmlRegStatePtr target = NULL; + state->reached = XML_REGEXP_MARK_VISITED; + /* + * Mark all states reachable from the current reachable state + */ + for (transnr = 0;transnr < state->nbTrans;transnr++) { + if ((state->trans[transnr].to >= 0) && + ((state->trans[transnr].atom != NULL) || + (state->trans[transnr].count >= 0))) { + int newto = state->trans[transnr].to; + + if (ctxt->states[newto] == NULL) + continue; + if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) { + ctxt->states[newto]->reached = XML_REGEXP_MARK_START; + target = ctxt->states[newto]; + } + } + } + + /* + * find the next accessible state not explored + */ + if (target == NULL) { + for (statenr = 1;statenr < ctxt->nbStates;statenr++) { + state = ctxt->states[statenr]; + if ((state != NULL) && (state->reached == + XML_REGEXP_MARK_START)) { + target = state; + break; + } + } + } + state = target; + } + for (statenr = 0;statenr < ctxt->nbStates;statenr++) { + state = ctxt->states[statenr]; + if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) { +#ifdef DEBUG_REGEXP_GRAPH + printf("Removed unreachable state %d\n", statenr); +#endif + xmlRegFreeState(state); + ctxt->states[statenr] = NULL; + } + } + +} + +static int +xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) { + int ret = 0; + + if ((range1->type == XML_REGEXP_RANGES) || + (range2->type == XML_REGEXP_RANGES) || + (range2->type == XML_REGEXP_SUBREG) || + (range1->type == XML_REGEXP_SUBREG) || + (range1->type == XML_REGEXP_STRING) || + (range2->type == XML_REGEXP_STRING)) + return(-1); + + /* put them in order */ + if (range1->type > range2->type) { + xmlRegRangePtr tmp; + + tmp = range1; + range1 = range2; + range2 = tmp; + } + if ((range1->type == XML_REGEXP_ANYCHAR) || + (range2->type == XML_REGEXP_ANYCHAR)) { + ret = 1; + } else if ((range1->type == XML_REGEXP_EPSILON) || + (range2->type == XML_REGEXP_EPSILON)) { + return(0); + } else if (range1->type == range2->type) { + if (range1->type != XML_REGEXP_CHARVAL) + ret = 1; + else if ((range1->end < range2->start) || + (range2->end < range1->start)) + ret = 0; + else + ret = 1; + } else if (range1->type == XML_REGEXP_CHARVAL) { + int codepoint; + int neg = 0; + + /* + * just check all codepoints in the range for acceptance, + * this is usually way cheaper since done only once at + * compilation than testing over and over at runtime or + * pushing too many states when evaluating. + */ + if (((range1->neg == 0) && (range2->neg != 0)) || + ((range1->neg != 0) && (range2->neg == 0))) + neg = 1; + + for (codepoint = range1->start;codepoint <= range1->end ;codepoint++) { + ret = xmlRegCheckCharacterRange(range2->type, codepoint, + 0, range2->start, range2->end, + range2->blockName); + if (ret < 0) + return(-1); + if (((neg == 1) && (ret == 0)) || + ((neg == 0) && (ret == 1))) + return(1); + } + return(0); + } else if ((range1->type == XML_REGEXP_BLOCK_NAME) || + (range2->type == XML_REGEXP_BLOCK_NAME)) { + if (range1->type == range2->type) { + ret = xmlStrEqual(range1->blockName, range2->blockName); + } else { + /* + * comparing a block range with anything else is way + * too costly, and maintining the table is like too much + * memory too, so let's force the automata to save state + * here. + */ + return(1); + } + } else if ((range1->type < XML_REGEXP_LETTER) || + (range2->type < XML_REGEXP_LETTER)) { + if ((range1->type == XML_REGEXP_ANYSPACE) && + (range2->type == XML_REGEXP_NOTSPACE)) + ret = 0; + else if ((range1->type == XML_REGEXP_INITNAME) && + (range2->type == XML_REGEXP_NOTINITNAME)) + ret = 0; + else if ((range1->type == XML_REGEXP_NAMECHAR) && + (range2->type == XML_REGEXP_NOTNAMECHAR)) + ret = 0; + else if ((range1->type == XML_REGEXP_DECIMAL) && + (range2->type == XML_REGEXP_NOTDECIMAL)) + ret = 0; + else if ((range1->type == XML_REGEXP_REALCHAR) && + (range2->type == XML_REGEXP_NOTREALCHAR)) + ret = 0; + else { + /* same thing to limit complexity */ + return(1); + } + } else { + ret = 0; + /* range1->type < range2->type here */ + switch (range1->type) { + case XML_REGEXP_LETTER: + /* all disjoint except in the subgroups */ + if ((range2->type == XML_REGEXP_LETTER_UPPERCASE) || + (range2->type == XML_REGEXP_LETTER_LOWERCASE) || + (range2->type == XML_REGEXP_LETTER_TITLECASE) || + (range2->type == XML_REGEXP_LETTER_MODIFIER) || + (range2->type == XML_REGEXP_LETTER_OTHERS)) + ret = 1; + break; + case XML_REGEXP_MARK: + if ((range2->type == XML_REGEXP_MARK_NONSPACING) || + (range2->type == XML_REGEXP_MARK_SPACECOMBINING) || + (range2->type == XML_REGEXP_MARK_ENCLOSING)) + ret = 1; + break; + case XML_REGEXP_NUMBER: + if ((range2->type == XML_REGEXP_NUMBER_DECIMAL) || + (range2->type == XML_REGEXP_NUMBER_LETTER) || + (range2->type == XML_REGEXP_NUMBER_OTHERS)) + ret = 1; + break; + case XML_REGEXP_PUNCT: + if ((range2->type == XML_REGEXP_PUNCT_CONNECTOR) || + (range2->type == XML_REGEXP_PUNCT_DASH) || + (range2->type == XML_REGEXP_PUNCT_OPEN) || + (range2->type == XML_REGEXP_PUNCT_CLOSE) || + (range2->type == XML_REGEXP_PUNCT_INITQUOTE) || + (range2->type == XML_REGEXP_PUNCT_FINQUOTE) || + (range2->type == XML_REGEXP_PUNCT_OTHERS)) + ret = 1; + break; + case XML_REGEXP_SEPAR: + if ((range2->type == XML_REGEXP_SEPAR_SPACE) || + (range2->type == XML_REGEXP_SEPAR_LINE) || + (range2->type == XML_REGEXP_SEPAR_PARA)) + ret = 1; + break; + case XML_REGEXP_SYMBOL: + if ((range2->type == XML_REGEXP_SYMBOL_MATH) || + (range2->type == XML_REGEXP_SYMBOL_CURRENCY) || + (range2->type == XML_REGEXP_SYMBOL_MODIFIER) || + (range2->type == XML_REGEXP_SYMBOL_OTHERS)) + ret = 1; + break; + case XML_REGEXP_OTHER: + if ((range2->type == XML_REGEXP_OTHER_CONTROL) || + (range2->type == XML_REGEXP_OTHER_FORMAT) || + (range2->type == XML_REGEXP_OTHER_PRIVATE)) + ret = 1; + break; + default: + if ((range2->type >= XML_REGEXP_LETTER) && + (range2->type < XML_REGEXP_BLOCK_NAME)) + ret = 0; + else { + /* safety net ! */ + return(1); + } + } + } + if (((range1->neg == 0) && (range2->neg != 0)) || + ((range1->neg != 0) && (range2->neg == 0))) + ret = !ret; + return(ret); +} + +/** + * xmlFACompareAtomTypes: + * @type1: an atom type + * @type2: an atom type + * + * Compares two atoms type to check whether they intersect in some ways, + * this is used by xmlFACompareAtoms only + * + * Returns 1 if they may intersect and 0 otherwise + */ +static int +xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) { + if ((type1 == XML_REGEXP_EPSILON) || + (type1 == XML_REGEXP_CHARVAL) || + (type1 == XML_REGEXP_RANGES) || + (type1 == XML_REGEXP_SUBREG) || + (type1 == XML_REGEXP_STRING) || + (type1 == XML_REGEXP_ANYCHAR)) + return(1); + if ((type2 == XML_REGEXP_EPSILON) || + (type2 == XML_REGEXP_CHARVAL) || + (type2 == XML_REGEXP_RANGES) || + (type2 == XML_REGEXP_SUBREG) || + (type2 == XML_REGEXP_STRING) || + (type2 == XML_REGEXP_ANYCHAR)) + return(1); + + if (type1 == type2) return(1); + + /* simplify subsequent compares by making sure type1 < type2 */ + if (type1 > type2) { + xmlRegAtomType tmp = type1; + type1 = type2; + type2 = tmp; + } + switch (type1) { + case XML_REGEXP_ANYSPACE: /* \s */ + /* can't be a letter, number, mark, pontuation, symbol */ + if ((type2 == XML_REGEXP_NOTSPACE) || + ((type2 >= XML_REGEXP_LETTER) && + (type2 <= XML_REGEXP_LETTER_OTHERS)) || + ((type2 >= XML_REGEXP_NUMBER) && + (type2 <= XML_REGEXP_NUMBER_OTHERS)) || + ((type2 >= XML_REGEXP_MARK) && + (type2 <= XML_REGEXP_MARK_ENCLOSING)) || + ((type2 >= XML_REGEXP_PUNCT) && + (type2 <= XML_REGEXP_PUNCT_OTHERS)) || + ((type2 >= XML_REGEXP_SYMBOL) && + (type2 <= XML_REGEXP_SYMBOL_OTHERS)) + ) return(0); + break; + case XML_REGEXP_NOTSPACE: /* \S */ + break; + case XML_REGEXP_INITNAME: /* \l */ + /* can't be a number, mark, separator, pontuation, symbol or other */ + if ((type2 == XML_REGEXP_NOTINITNAME) || + ((type2 >= XML_REGEXP_NUMBER) && + (type2 <= XML_REGEXP_NUMBER_OTHERS)) || + ((type2 >= XML_REGEXP_MARK) && + (type2 <= XML_REGEXP_MARK_ENCLOSING)) || + ((type2 >= XML_REGEXP_SEPAR) && + (type2 <= XML_REGEXP_SEPAR_PARA)) || + ((type2 >= XML_REGEXP_PUNCT) && + (type2 <= XML_REGEXP_PUNCT_OTHERS)) || + ((type2 >= XML_REGEXP_SYMBOL) && + (type2 <= XML_REGEXP_SYMBOL_OTHERS)) || + ((type2 >= XML_REGEXP_OTHER) && + (type2 <= XML_REGEXP_OTHER_NA)) + ) return(0); + break; + case XML_REGEXP_NOTINITNAME: /* \L */ + break; + case XML_REGEXP_NAMECHAR: /* \c */ + /* can't be a mark, separator, pontuation, symbol or other */ + if ((type2 == XML_REGEXP_NOTNAMECHAR) || + ((type2 >= XML_REGEXP_MARK) && + (type2 <= XML_REGEXP_MARK_ENCLOSING)) || + ((type2 >= XML_REGEXP_PUNCT) && + (type2 <= XML_REGEXP_PUNCT_OTHERS)) || + ((type2 >= XML_REGEXP_SEPAR) && + (type2 <= XML_REGEXP_SEPAR_PARA)) || + ((type2 >= XML_REGEXP_SYMBOL) && + (type2 <= XML_REGEXP_SYMBOL_OTHERS)) || + ((type2 >= XML_REGEXP_OTHER) && + (type2 <= XML_REGEXP_OTHER_NA)) + ) return(0); + break; + case XML_REGEXP_NOTNAMECHAR: /* \C */ + break; + case XML_REGEXP_DECIMAL: /* \d */ + /* can't be a letter, mark, separator, pontuation, symbol or other */ + if ((type2 == XML_REGEXP_NOTDECIMAL) || + (type2 == XML_REGEXP_REALCHAR) || + ((type2 >= XML_REGEXP_LETTER) && + (type2 <= XML_REGEXP_LETTER_OTHERS)) || + ((type2 >= XML_REGEXP_MARK) && + (type2 <= XML_REGEXP_MARK_ENCLOSING)) || + ((type2 >= XML_REGEXP_PUNCT) && + (type2 <= XML_REGEXP_PUNCT_OTHERS)) || + ((type2 >= XML_REGEXP_SEPAR) && + (type2 <= XML_REGEXP_SEPAR_PARA)) || + ((type2 >= XML_REGEXP_SYMBOL) && + (type2 <= XML_REGEXP_SYMBOL_OTHERS)) || + ((type2 >= XML_REGEXP_OTHER) && + (type2 <= XML_REGEXP_OTHER_NA)) + )return(0); + break; + case XML_REGEXP_NOTDECIMAL: /* \D */ + break; + case XML_REGEXP_REALCHAR: /* \w */ + /* can't be a mark, separator, pontuation, symbol or other */ + if ((type2 == XML_REGEXP_NOTDECIMAL) || + ((type2 >= XML_REGEXP_MARK) && + (type2 <= XML_REGEXP_MARK_ENCLOSING)) || + ((type2 >= XML_REGEXP_PUNCT) && + (type2 <= XML_REGEXP_PUNCT_OTHERS)) || + ((type2 >= XML_REGEXP_SEPAR) && + (type2 <= XML_REGEXP_SEPAR_PARA)) || + ((type2 >= XML_REGEXP_SYMBOL) && + (type2 <= XML_REGEXP_SYMBOL_OTHERS)) || + ((type2 >= XML_REGEXP_OTHER) && + (type2 <= XML_REGEXP_OTHER_NA)) + )return(0); + break; + case XML_REGEXP_NOTREALCHAR: /* \W */ + break; + /* + * at that point we know both type 1 and type2 are from + * character categories are ordered and are different, + * it becomes simple because this is a partition + */ + case XML_REGEXP_LETTER: + if (type2 <= XML_REGEXP_LETTER_OTHERS) + return(1); + return(0); + case XML_REGEXP_LETTER_UPPERCASE: + case XML_REGEXP_LETTER_LOWERCASE: + case XML_REGEXP_LETTER_TITLECASE: + case XML_REGEXP_LETTER_MODIFIER: + case XML_REGEXP_LETTER_OTHERS: + return(0); + case XML_REGEXP_MARK: + if (type2 <= XML_REGEXP_MARK_ENCLOSING) + return(1); + return(0); + case XML_REGEXP_MARK_NONSPACING: + case XML_REGEXP_MARK_SPACECOMBINING: + case XML_REGEXP_MARK_ENCLOSING: + return(0); + case XML_REGEXP_NUMBER: + if (type2 <= XML_REGEXP_NUMBER_OTHERS) + return(1); + return(0); + case XML_REGEXP_NUMBER_DECIMAL: + case XML_REGEXP_NUMBER_LETTER: + case XML_REGEXP_NUMBER_OTHERS: + return(0); + case XML_REGEXP_PUNCT: + if (type2 <= XML_REGEXP_PUNCT_OTHERS) + return(1); + return(0); + case XML_REGEXP_PUNCT_CONNECTOR: + case XML_REGEXP_PUNCT_DASH: + case XML_REGEXP_PUNCT_OPEN: + case XML_REGEXP_PUNCT_CLOSE: + case XML_REGEXP_PUNCT_INITQUOTE: + case XML_REGEXP_PUNCT_FINQUOTE: + case XML_REGEXP_PUNCT_OTHERS: + return(0); + case XML_REGEXP_SEPAR: + if (type2 <= XML_REGEXP_SEPAR_PARA) + return(1); + return(0); + case XML_REGEXP_SEPAR_SPACE: + case XML_REGEXP_SEPAR_LINE: + case XML_REGEXP_SEPAR_PARA: + return(0); + case XML_REGEXP_SYMBOL: + if (type2 <= XML_REGEXP_SYMBOL_OTHERS) + return(1); + return(0); + case XML_REGEXP_SYMBOL_MATH: + case XML_REGEXP_SYMBOL_CURRENCY: + case XML_REGEXP_SYMBOL_MODIFIER: + case XML_REGEXP_SYMBOL_OTHERS: + return(0); + case XML_REGEXP_OTHER: + if (type2 <= XML_REGEXP_OTHER_NA) + return(1); + return(0); + case XML_REGEXP_OTHER_CONTROL: + case XML_REGEXP_OTHER_FORMAT: + case XML_REGEXP_OTHER_PRIVATE: + case XML_REGEXP_OTHER_NA: + return(0); + default: + break; + } + return(1); +} + +/** + * xmlFAEqualAtoms: + * @atom1: an atom + * @atom2: an atom + * @deep: if not set only compare string pointers + * + * Compares two atoms to check whether they are the same exactly + * this is used to remove equivalent transitions + * + * Returns 1 if same and 0 otherwise + */ +static int +xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) { + int ret = 0; + + if (atom1 == atom2) + return(1); + if ((atom1 == NULL) || (atom2 == NULL)) + return(0); + + if (atom1->type != atom2->type) + return(0); + switch (atom1->type) { + case XML_REGEXP_EPSILON: + ret = 0; + break; + case XML_REGEXP_STRING: + if (!deep) + ret = (atom1->valuep == atom2->valuep); + else + ret = xmlStrEqual((xmlChar *)atom1->valuep, + (xmlChar *)atom2->valuep); + break; + case XML_REGEXP_CHARVAL: + ret = (atom1->codepoint == atom2->codepoint); + break; + case XML_REGEXP_RANGES: + /* too hard to do in the general case */ + ret = 0; + default: + break; + } + return(ret); +} + +/** + * xmlFACompareAtoms: + * @atom1: an atom + * @atom2: an atom + * @deep: if not set only compare string pointers + * + * Compares two atoms to check whether they intersect in some ways, + * this is used by xmlFAComputesDeterminism and xmlFARecurseDeterminism only + * + * Returns 1 if yes and 0 otherwise + */ +static int +xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) { + int ret = 1; + + if (atom1 == atom2) + return(1); + if ((atom1 == NULL) || (atom2 == NULL)) + return(0); + + if ((atom1->type == XML_REGEXP_ANYCHAR) || + (atom2->type == XML_REGEXP_ANYCHAR)) + return(1); + + if (atom1->type > atom2->type) { + xmlRegAtomPtr tmp; + tmp = atom1; + atom1 = atom2; + atom2 = tmp; + } + if (atom1->type != atom2->type) { + ret = xmlFACompareAtomTypes(atom1->type, atom2->type); + /* if they can't intersect at the type level break now */ + if (ret == 0) + return(0); + } + switch (atom1->type) { + case XML_REGEXP_STRING: + if (!deep) + ret = (atom1->valuep != atom2->valuep); + else + ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep, + (xmlChar *)atom2->valuep); + break; + case XML_REGEXP_EPSILON: + goto not_determinist; + case XML_REGEXP_CHARVAL: + if (atom2->type == XML_REGEXP_CHARVAL) { + ret = (atom1->codepoint == atom2->codepoint); + } else { + ret = xmlRegCheckCharacter(atom2, atom1->codepoint); + if (ret < 0) + ret = 1; + } + break; + case XML_REGEXP_RANGES: + if (atom2->type == XML_REGEXP_RANGES) { + int i, j, res; + xmlRegRangePtr r1, r2; + + /* + * need to check that none of the ranges eventually matches + */ + for (i = 0;i < atom1->nbRanges;i++) { + for (j = 0;j < atom2->nbRanges;j++) { + r1 = atom1->ranges[i]; + r2 = atom2->ranges[j]; + res = xmlFACompareRanges(r1, r2); + if (res == 1) { + ret = 1; + goto done; + } + } + } + ret = 0; + } + break; + default: + goto not_determinist; + } +done: + if (atom1->neg != atom2->neg) { + ret = !ret; + } + if (ret == 0) + return(0); +not_determinist: + return(1); +} + +/** + * xmlFARecurseDeterminism: + * @ctxt: a regexp parser context + * + * Check whether the associated regexp is determinist, + * should be called after xmlFAEliminateEpsilonTransitions() + * + */ +static int +xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, + int to, xmlRegAtomPtr atom) { + int ret = 1; + int res; + int transnr, nbTrans; + xmlRegTransPtr t1; + int deep = 1; + + if (state == NULL) + return(ret); + + if (ctxt->flags & AM_AUTOMATA_RNG) + deep = 0; + + /* + * don't recurse on transitions potentially added in the course of + * the elimination. + */ + nbTrans = state->nbTrans; + for (transnr = 0;transnr < nbTrans;transnr++) { + t1 = &(state->trans[transnr]); + /* + * check transitions conflicting with the one looked at + */ + if (t1->atom == NULL) { + if (t1->to < 0) + continue; + res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], + to, atom); + if (res == 0) { + ret = 0; + /* t1->nd = 1; */ + } + continue; + } + if (t1->to != to) + continue; + if (xmlFACompareAtoms(t1->atom, atom, deep)) { + ret = 0; + /* mark the transition as non-deterministic */ + t1->nd = 1; + } + } + return(ret); +} + +/** + * xmlFAComputesDeterminism: + * @ctxt: a regexp parser context + * + * Check whether the associated regexp is determinist, + * should be called after xmlFAEliminateEpsilonTransitions() + * + */ +static int +xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) { + int statenr, transnr; + xmlRegStatePtr state; + xmlRegTransPtr t1, t2, last; + int i; + int ret = 1; + int deep = 1; + +#ifdef DEBUG_REGEXP_GRAPH + printf("xmlFAComputesDeterminism\n"); + xmlRegPrintCtxt(stdout, ctxt); +#endif + if (ctxt->determinist != -1) + return(ctxt->determinist); + + if (ctxt->flags & AM_AUTOMATA_RNG) + deep = 0; + + /* + * First cleanup the automata removing cancelled transitions + */ + for (statenr = 0;statenr < ctxt->nbStates;statenr++) { + state = ctxt->states[statenr]; + if (state == NULL) + continue; + if (state->nbTrans < 2) + continue; + for (transnr = 0;transnr < state->nbTrans;transnr++) { + t1 = &(state->trans[transnr]); + /* + * Determinism checks in case of counted or all transitions + * will have to be handled separately + */ + if (t1->atom == NULL) { + /* t1->nd = 1; */ + continue; + } + if (t1->to == -1) /* eliminated */ + continue; + for (i = 0;i < transnr;i++) { + t2 = &(state->trans[i]); + if (t2->to == -1) /* eliminated */ + continue; + if (t2->atom != NULL) { + if (t1->to == t2->to) { + /* + * Here we use deep because we want to keep the + * transitions which indicate a conflict + */ + if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) && + (t1->counter == t2->counter) && + (t1->count == t2->count)) + t2->to = -1; /* eliminated */ + } + } + } + } + } + + /* + * Check for all states that there aren't 2 transitions + * with the same atom and a different target. + */ + for (statenr = 0;statenr < ctxt->nbStates;statenr++) { + state = ctxt->states[statenr]; + if (state == NULL) + continue; + if (state->nbTrans < 2) + continue; + last = NULL; + for (transnr = 0;transnr < state->nbTrans;transnr++) { + t1 = &(state->trans[transnr]); + /* + * Determinism checks in case of counted or all transitions + * will have to be handled separately + */ + if (t1->atom == NULL) { + continue; + } + if (t1->to == -1) /* eliminated */ + continue; + for (i = 0;i < transnr;i++) { + t2 = &(state->trans[i]); + if (t2->to == -1) /* eliminated */ + continue; + if (t2->atom != NULL) { + /* + * But here we don't use deep because we want to + * find transitions which indicate a conflict + */ + if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) { + ret = 0; + /* mark the transitions as non-deterministic ones */ + t1->nd = 1; + t2->nd = 1; + last = t1; + } + } else if (t1->to != -1) { + /* + * do the closure in case of remaining specific + * epsilon transitions like choices or all + */ + ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], + t2->to, t2->atom); + /* don't shortcut the computation so all non deterministic + transition get marked down + if (ret == 0) + return(0); + */ + if (ret == 0) { + t1->nd = 1; + /* t2->nd = 1; */ + last = t1; + } + } + } + /* don't shortcut the computation so all non deterministic + transition get marked down + if (ret == 0) + break; */ + } + + /* + * mark specifically the last non-deterministic transition + * from a state since there is no need to set-up rollback + * from it + */ + if (last != NULL) { + last->nd = 2; + } + + /* don't shortcut the computation so all non deterministic + transition get marked down + if (ret == 0) + break; */ + } + + ctxt->determinist = ret; + return(ret); +} + +/************************************************************************ + * * + * Routines to check input against transition atoms * + * * + ************************************************************************/ + +static int +xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg, + int start, int end, const xmlChar *blockName) { + int ret = 0; + + switch (type) { + case XML_REGEXP_STRING: + case XML_REGEXP_SUBREG: + case XML_REGEXP_RANGES: + case XML_REGEXP_EPSILON: + return(-1); + case XML_REGEXP_ANYCHAR: + ret = ((codepoint != '\n') && (codepoint != '\r')); + break; + case XML_REGEXP_CHARVAL: + ret = ((codepoint >= start) && (codepoint <= end)); + break; + case XML_REGEXP_NOTSPACE: + neg = !neg; + case XML_REGEXP_ANYSPACE: + ret = ((codepoint == '\n') || (codepoint == '\r') || + (codepoint == '\t') || (codepoint == ' ')); + break; + case XML_REGEXP_NOTINITNAME: + neg = !neg; + case XML_REGEXP_INITNAME: + ret = (xmlIsLetter(codepoint) || + (codepoint == '_') || (codepoint == ':')); + break; + case XML_REGEXP_NOTNAMECHAR: + neg = !neg; + case XML_REGEXP_NAMECHAR: + ret = (xmlIsLetter(codepoint) || xmlIsDigit(codepoint) || + (codepoint == '.') || (codepoint == '-') || + (codepoint == '_') || (codepoint == ':') || + xmlIsCombining(codepoint) || xmlIsExtender(codepoint)); + break; + case XML_REGEXP_NOTDECIMAL: + neg = !neg; + case XML_REGEXP_DECIMAL: + ret = xmlUCSIsCatNd(codepoint); + break; + case XML_REGEXP_REALCHAR: + neg = !neg; + case XML_REGEXP_NOTREALCHAR: + ret = xmlUCSIsCatP(codepoint); + if (ret == 0) + ret = xmlUCSIsCatZ(codepoint); + if (ret == 0) + ret = xmlUCSIsCatC(codepoint); + break; + case XML_REGEXP_LETTER: + ret = xmlUCSIsCatL(codepoint); + break; + case XML_REGEXP_LETTER_UPPERCASE: + ret = xmlUCSIsCatLu(codepoint); + break; + case XML_REGEXP_LETTER_LOWERCASE: + ret = xmlUCSIsCatLl(codepoint); + break; + case XML_REGEXP_LETTER_TITLECASE: + ret = xmlUCSIsCatLt(codepoint); + break; + case XML_REGEXP_LETTER_MODIFIER: + ret = xmlUCSIsCatLm(codepoint); + break; + case XML_REGEXP_LETTER_OTHERS: + ret = xmlUCSIsCatLo(codepoint); + break; + case XML_REGEXP_MARK: + ret = xmlUCSIsCatM(codepoint); + break; + case XML_REGEXP_MARK_NONSPACING: + ret = xmlUCSIsCatMn(codepoint); + break; + case XML_REGEXP_MARK_SPACECOMBINING: + ret = xmlUCSIsCatMc(codepoint); + break; + case XML_REGEXP_MARK_ENCLOSING: + ret = xmlUCSIsCatMe(codepoint); + break; + case XML_REGEXP_NUMBER: + ret = xmlUCSIsCatN(codepoint); + break; + case XML_REGEXP_NUMBER_DECIMAL: + ret = xmlUCSIsCatNd(codepoint); + break; + case XML_REGEXP_NUMBER_LETTER: + ret = xmlUCSIsCatNl(codepoint); + break; + case XML_REGEXP_NUMBER_OTHERS: + ret = xmlUCSIsCatNo(codepoint); + break; + case XML_REGEXP_PUNCT: + ret = xmlUCSIsCatP(codepoint); + break; + case XML_REGEXP_PUNCT_CONNECTOR: + ret = xmlUCSIsCatPc(codepoint); + break; + case XML_REGEXP_PUNCT_DASH: + ret = xmlUCSIsCatPd(codepoint); + break; + case XML_REGEXP_PUNCT_OPEN: + ret = xmlUCSIsCatPs(codepoint); + break; + case XML_REGEXP_PUNCT_CLOSE: + ret = xmlUCSIsCatPe(codepoint); + break; + case XML_REGEXP_PUNCT_INITQUOTE: + ret = xmlUCSIsCatPi(codepoint); + break; + case XML_REGEXP_PUNCT_FINQUOTE: + ret = xmlUCSIsCatPf(codepoint); + break; + case XML_REGEXP_PUNCT_OTHERS: + ret = xmlUCSIsCatPo(codepoint); + break; + case XML_REGEXP_SEPAR: + ret = xmlUCSIsCatZ(codepoint); + break; + case XML_REGEXP_SEPAR_SPACE: + ret = xmlUCSIsCatZs(codepoint); + break; + case XML_REGEXP_SEPAR_LINE: + ret = xmlUCSIsCatZl(codepoint); + break; + case XML_REGEXP_SEPAR_PARA: + ret = xmlUCSIsCatZp(codepoint); + break; + case XML_REGEXP_SYMBOL: + ret = xmlUCSIsCatS(codepoint); + break; + case XML_REGEXP_SYMBOL_MATH: + ret = xmlUCSIsCatSm(codepoint); + break; + case XML_REGEXP_SYMBOL_CURRENCY: + ret = xmlUCSIsCatSc(codepoint); + break; + case XML_REGEXP_SYMBOL_MODIFIER: + ret = xmlUCSIsCatSk(codepoint); + break; + case XML_REGEXP_SYMBOL_OTHERS: + ret = xmlUCSIsCatSo(codepoint); + break; + case XML_REGEXP_OTHER: + ret = xmlUCSIsCatC(codepoint); + break; + case XML_REGEXP_OTHER_CONTROL: + ret = xmlUCSIsCatCc(codepoint); + break; + case XML_REGEXP_OTHER_FORMAT: + ret = xmlUCSIsCatCf(codepoint); + break; + case XML_REGEXP_OTHER_PRIVATE: + ret = xmlUCSIsCatCo(codepoint); + break; + case XML_REGEXP_OTHER_NA: + /* ret = xmlUCSIsCatCn(codepoint); */ + /* Seems it doesn't exist anymore in recent Unicode releases */ + ret = 0; + break; + case XML_REGEXP_BLOCK_NAME: + ret = xmlUCSIsBlock(codepoint, (const char *) blockName); + break; + } + if (neg) + return(!ret); + return(ret); +} + +static int +xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) { + int i, ret = 0; + xmlRegRangePtr range; + + if ((atom == NULL) || (!xmlIsChar(codepoint))) + return(-1); + + switch (atom->type) { + case XML_REGEXP_SUBREG: + case XML_REGEXP_EPSILON: + return(-1); + case XML_REGEXP_CHARVAL: + return(codepoint == atom->codepoint); + case XML_REGEXP_RANGES: { + int accept = 0; + + for (i = 0;i < atom->nbRanges;i++) { + range = atom->ranges[i]; + if (range->neg == 2) { + ret = xmlRegCheckCharacterRange(range->type, codepoint, + 0, range->start, range->end, + range->blockName); + if (ret != 0) + return(0); /* excluded char */ + } else if (range->neg) { + ret = xmlRegCheckCharacterRange(range->type, codepoint, + 0, range->start, range->end, + range->blockName); + if (ret == 0) + accept = 1; + else + return(0); + } else { + ret = xmlRegCheckCharacterRange(range->type, codepoint, + 0, range->start, range->end, + range->blockName); + if (ret != 0) + accept = 1; /* might still be excluded */ + } + } + return(accept); + } + case XML_REGEXP_STRING: + /*TODO XML_REGEXP_STRING */ + return(-1); + case XML_REGEXP_ANYCHAR: + case XML_REGEXP_ANYSPACE: + case XML_REGEXP_NOTSPACE: + case XML_REGEXP_INITNAME: + case XML_REGEXP_NOTINITNAME: + case XML_REGEXP_NAMECHAR: + case XML_REGEXP_NOTNAMECHAR: + case XML_REGEXP_DECIMAL: + case XML_REGEXP_NOTDECIMAL: + case XML_REGEXP_REALCHAR: + case XML_REGEXP_NOTREALCHAR: + case XML_REGEXP_LETTER: + case XML_REGEXP_LETTER_UPPERCASE: + case XML_REGEXP_LETTER_LOWERCASE: + case XML_REGEXP_LETTER_TITLECASE: + case XML_REGEXP_LETTER_MODIFIER: + case XML_REGEXP_LETTER_OTHERS: + case XML_REGEXP_MARK: + case XML_REGEXP_MARK_NONSPACING: + case XML_REGEXP_MARK_SPACECOMBINING: + case XML_REGEXP_MARK_ENCLOSING: + case XML_REGEXP_NUMBER: + case XML_REGEXP_NUMBER_DECIMAL: + case XML_REGEXP_NUMBER_LETTER: + case XML_REGEXP_NUMBER_OTHERS: + case XML_REGEXP_PUNCT: + case XML_REGEXP_PUNCT_CONNECTOR: + case XML_REGEXP_PUNCT_DASH: + case XML_REGEXP_PUNCT_OPEN: + case XML_REGEXP_PUNCT_CLOSE: + case XML_REGEXP_PUNCT_INITQUOTE: + case XML_REGEXP_PUNCT_FINQUOTE: + case XML_REGEXP_PUNCT_OTHERS: + case XML_REGEXP_SEPAR: + case XML_REGEXP_SEPAR_SPACE: + case XML_REGEXP_SEPAR_LINE: + case XML_REGEXP_SEPAR_PARA: + case XML_REGEXP_SYMBOL: + case XML_REGEXP_SYMBOL_MATH: + case XML_REGEXP_SYMBOL_CURRENCY: + case XML_REGEXP_SYMBOL_MODIFIER: + case XML_REGEXP_SYMBOL_OTHERS: + case XML_REGEXP_OTHER: + case XML_REGEXP_OTHER_CONTROL: + case XML_REGEXP_OTHER_FORMAT: + case XML_REGEXP_OTHER_PRIVATE: + case XML_REGEXP_OTHER_NA: + case XML_REGEXP_BLOCK_NAME: + ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0, + (const xmlChar *)atom->valuep); + if (atom->neg) + ret = !ret; + break; + } + return(ret); +} + +/************************************************************************ + * * + * Saving and restoring state of an execution context * + * * + ************************************************************************/ + +#ifdef DEBUG_REGEXP_EXEC +static void +xmlFARegDebugExec(xmlRegExecCtxtPtr exec) { + printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index); + if (exec->inputStack != NULL) { + int i; + printf(": "); + for (i = 0;(i < 3) && (i < exec->inputStackNr);i++) + printf("%s ", (const char *) + exec->inputStack[exec->inputStackNr - (i + 1)].value); + } else { + printf(": %s", &(exec->inputString[exec->index])); + } + printf("\n"); +} +#endif + +static void +xmlFARegExecSave(xmlRegExecCtxtPtr exec) { +#ifdef DEBUG_REGEXP_EXEC + printf("saving "); + exec->transno++; + xmlFARegDebugExec(exec); + exec->transno--; +#endif +#ifdef MAX_PUSH + if (exec->nbPush > MAX_PUSH) { + return; + } + exec->nbPush++; +#endif + + if (exec->maxRollbacks == 0) { + exec->maxRollbacks = 4; + exec->rollbacks = (xmlRegExecRollback *) REGEXP_MALLOC(exec->maxRollbacks * + sizeof(xmlRegExecRollback)); + if (exec->rollbacks == NULL) { + xmlRegexpErrMemory(NULL, "saving regexp"); + exec->maxRollbacks = 0; + return; + } + memset(exec->rollbacks, 0, + exec->maxRollbacks * sizeof(xmlRegExecRollback)); + } else if (exec->nbRollbacks >= exec->maxRollbacks) { + xmlRegExecRollback *tmp; + int len = exec->maxRollbacks; + + exec->maxRollbacks *= 2; + tmp = (xmlRegExecRollback *) REGEXP_REALLOC(exec->rollbacks, + exec->maxRollbacks * sizeof(xmlRegExecRollback)); + if (tmp == NULL) { + xmlRegexpErrMemory(NULL, "saving regexp"); + exec->maxRollbacks /= 2; + return; + } + exec->rollbacks = tmp; + tmp = &exec->rollbacks[len]; + memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback)); + } + exec->rollbacks[exec->nbRollbacks].state = exec->state; + exec->rollbacks[exec->nbRollbacks].index = exec->index; + exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1; + if (exec->comp->nbCounters > 0) { + if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { + exec->rollbacks[exec->nbRollbacks].counts = (int *) + REGEXP_MALLOC(exec->comp->nbCounters * sizeof(int)); + if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { + xmlRegexpErrMemory(NULL, "saving regexp"); + exec->status = -5; + return; + } + } + memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts, + exec->comp->nbCounters * sizeof(int)); + } + exec->nbRollbacks++; +} + +static void +xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) { + if (exec->nbRollbacks <= 0) { + exec->status = -1; +#ifdef DEBUG_REGEXP_EXEC + printf("rollback failed on empty stack\n"); +#endif + return; + } + exec->nbRollbacks--; + exec->state = exec->rollbacks[exec->nbRollbacks].state; + exec->index = exec->rollbacks[exec->nbRollbacks].index; + exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch; + if (exec->comp->nbCounters > 0) { + if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { +#ifdef DEBUG_REGEXP_EXEC + fprintf(stderr, "exec save: allocation failed"); +#endif + exec->status = -6; + return; + } + memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts, + exec->comp->nbCounters * sizeof(int)); + } + +#ifdef DEBUG_REGEXP_EXEC + printf("restored "); + xmlFARegDebugExec(exec); +#endif +} + +/************************************************************************ + * * + * Verifier, running an input against a compiled regexp * + * * + ************************************************************************/ + +static int +xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) { + xmlRegExecCtxt execval; + xmlRegExecCtxtPtr exec = &execval; + int ret, codepoint = 0, len, deter; + + exec->inputString = content; + exec->index = 0; + exec->nbPush = 0; + exec->determinist = 1; + exec->maxRollbacks = 0; + exec->nbRollbacks = 0; + exec->rollbacks = NULL; + exec->status = 0; + exec->comp = comp; + exec->state = comp->states[0]; + exec->transno = 0; + exec->transcount = 0; + exec->inputStack = NULL; + exec->inputStackMax = 0; + if (comp->nbCounters > 0) { + exec->counts = (int *) REGEXP_MALLOC(comp->nbCounters * sizeof(int)); + if (exec->counts == NULL) { + xmlRegexpErrMemory(NULL, "running regexp"); + return(-1); + } + memset(exec->counts, 0, comp->nbCounters * sizeof(int)); + } else + exec->counts = NULL; + while ((exec->status == 0) && + ((exec->inputString[exec->index] != 0) || + ((exec->state != NULL) && + (exec->state->type != XML_REGEXP_FINAL_STATE)))) { + xmlRegTransPtr trans; + xmlRegAtomPtr atom; + + /* + * If end of input on non-terminal state, rollback, however we may + * still have epsilon like transition for counted transitions + * on counters, in that case don't break too early. Additionally, + * if we are working on a range like "AB{0,2}", where B is not present, + * we don't want to break. + */ + len = 1; + if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) { + /* + * if there is a transition, we must check if + * atom allows minOccurs of 0 + */ + if (exec->transno < exec->state->nbTrans) { + trans = &exec->state->trans[exec->transno]; + if (trans->to >=0) { + atom = trans->atom; + if (!((atom->min == 0) && (atom->max > 0))) + goto rollback; + } + } else + goto rollback; + } + + exec->transcount = 0; + for (;exec->transno < exec->state->nbTrans;exec->transno++) { + trans = &exec->state->trans[exec->transno]; + if (trans->to < 0) + continue; + atom = trans->atom; + ret = 0; + deter = 1; + if (trans->count >= 0) { + int count; + xmlRegCounterPtr counter; + + if (exec->counts == NULL) { + exec->status = -1; + goto error; + } + /* + * A counted transition. + */ + + count = exec->counts[trans->count]; + counter = &exec->comp->counters[trans->count]; +#ifdef DEBUG_REGEXP_EXEC + printf("testing count %d: val %d, min %d, max %d\n", + trans->count, count, counter->min, counter->max); +#endif + ret = ((count >= counter->min) && (count <= counter->max)); + if ((ret) && (counter->min != counter->max)) + deter = 0; + } else if (atom == NULL) { +#ifdef DEBUG_REGEXP_EXEC + fprintf(stderr, "epsilon transition left at runtime\n"); +#endif + exec->status = -2; + break; + } else if (exec->inputString[exec->index] != 0) { + codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len); + ret = xmlRegCheckCharacter(atom, codepoint); + if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) { + xmlRegStatePtr to = comp->states[trans->to]; + + /* + * this is a multiple input sequence + * If there is a counter associated increment it now. + * before potentially saving and rollback + * do not increment if the counter is already over the + * maximum limit in which case get to next transition + */ + if (trans->counter >= 0) { + xmlRegCounterPtr counter; + + if ((exec->counts == NULL) || + (exec->comp == NULL) || + (exec->comp->counters == NULL)) { + exec->status = -1; + goto error; + } + counter = &exec->comp->counters[trans->counter]; + if (exec->counts[trans->counter] >= counter->max) + continue; /* for loop on transitions */ + +#ifdef DEBUG_REGEXP_EXEC + printf("Increasing count %d\n", trans->counter); +#endif + exec->counts[trans->counter]++; + } + if (exec->state->nbTrans > exec->transno + 1) { + xmlFARegExecSave(exec); + } + exec->transcount = 1; + do { + /* + * Try to progress as much as possible on the input + */ + if (exec->transcount == atom->max) { + break; + } + exec->index += len; + /* + * End of input: stop here + */ + if (exec->inputString[exec->index] == 0) { + exec->index -= len; + break; + } + if (exec->transcount >= atom->min) { + int transno = exec->transno; + xmlRegStatePtr state = exec->state; + + /* + * The transition is acceptable save it + */ + exec->transno = -1; /* trick */ + exec->state = to; + xmlFARegExecSave(exec); + exec->transno = transno; + exec->state = state; + } + codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), + len); + ret = xmlRegCheckCharacter(atom, codepoint); + exec->transcount++; + } while (ret == 1); + if (exec->transcount < atom->min) + ret = 0; + + /* + * If the last check failed but one transition was found + * possible, rollback + */ + if (ret < 0) + ret = 0; + if (ret == 0) { + goto rollback; + } + if (trans->counter >= 0) { + if (exec->counts == NULL) { + exec->status = -1; + goto error; + } +#ifdef DEBUG_REGEXP_EXEC + printf("Decreasing count %d\n", trans->counter); +#endif + exec->counts[trans->counter]--; + } + } else if ((ret == 0) && (atom->min == 0) && (atom->max > 0)) { + /* + * we don't match on the codepoint, but minOccurs of 0 + * says that's ok. Setting len to 0 inhibits stepping + * over the codepoint. + */ + exec->transcount = 1; + len = 0; + ret = 1; + } + } else if ((atom->min == 0) && (atom->max > 0)) { + /* another spot to match when minOccurs is 0 */ + exec->transcount = 1; + len = 0; + ret = 1; + } + if (ret == 1) { + if ((trans->nd == 1) || + ((trans->count >= 0) && (deter == 0) && + (exec->state->nbTrans > exec->transno + 1))) { +#ifdef DEBUG_REGEXP_EXEC + if (trans->nd == 1) + printf("Saving on nd transition atom %d for %c at %d\n", + trans->atom->no, codepoint, exec->index); + else + printf("Saving on counted transition count %d for %c at %d\n", + trans->count, codepoint, exec->index); +#endif + xmlFARegExecSave(exec); + } + if (trans->counter >= 0) { + xmlRegCounterPtr counter; + + /* make sure we don't go over the counter maximum value */ + if ((exec->counts == NULL) || + (exec->comp == NULL) || + (exec->comp->counters == NULL)) { + exec->status = -1; + goto error; + } + counter = &exec->comp->counters[trans->counter]; + if (exec->counts[trans->counter] >= counter->max) + continue; /* for loop on transitions */ +#ifdef DEBUG_REGEXP_EXEC + printf("Increasing count %d\n", trans->counter); +#endif + exec->counts[trans->counter]++; + } + if ((trans->count >= 0) && + (trans->count < REGEXP_ALL_COUNTER)) { + if (exec->counts == NULL) { + exec->status = -1; + goto error; + } +#ifdef DEBUG_REGEXP_EXEC + printf("resetting count %d on transition\n", + trans->count); +#endif + exec->counts[trans->count] = 0; + } +#ifdef DEBUG_REGEXP_EXEC + printf("entering state %d\n", trans->to); +#endif + exec->state = comp->states[trans->to]; + exec->transno = 0; + if (trans->atom != NULL) { + exec->index += len; + } + goto progress; + } else if (ret < 0) { + exec->status = -4; + break; + } + } + if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { +rollback: + /* + * Failed to find a way out + */ + exec->determinist = 0; +#ifdef DEBUG_REGEXP_EXEC + printf("rollback from state %d on %d:%c\n", exec->state->no, + codepoint,codepoint); +#endif + xmlFARegExecRollBack(exec); + } +progress: + continue; + } +error: + if (exec->rollbacks != NULL) { + if (exec->counts != NULL) { + int i; + + for (i = 0;i < exec->maxRollbacks;i++) + if (exec->rollbacks[i].counts != NULL) + REGEXP_FREE(exec->rollbacks[i].counts); + } + REGEXP_FREE(exec->rollbacks); + } + if (exec->counts != NULL) + REGEXP_FREE(exec->counts); + if (exec->status == 0) + return(1); + if (exec->status == -1) { + if (exec->nbPush > MAX_PUSH) + return(-1); + return(0); + } + return(exec->status); +} + +/************************************************************************ + * * + * Progressive interface to the verifier one atom at a time * + * * + ************************************************************************/ + +/** + * xmlRegStrEqualWildcard: + * @expStr: the string to be evaluated + * @valStr: the validation string + * + * Checks if both strings are equal or have the same content. "*" + * can be used as a wildcard in @valStr; "|" is used as a seperator of + * substrings in both @expStr and @valStr. + * + * Returns 1 if the comparison is satisfied and the number of substrings + * is equal, 0 otherwise. + */ + +static int +xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) { + if (expStr == valStr) return(1); + if (expStr == NULL) return(0); + if (valStr == NULL) return(0); + do { + /* + * Eval if we have a wildcard for the current item. + */ + if (*expStr != *valStr) { + /* if one of them starts with a wildcard make valStr be it */ + if (*valStr == '*') { + const xmlChar *tmp; + + tmp = valStr; + valStr = expStr; + expStr = tmp; + } + if ((*valStr != 0) && (*expStr != 0) && (*expStr++ == '*')) { + do { + if (*valStr == XML_REG_STRING_SEPARATOR) + break; + valStr++; + } while (*valStr != 0); + continue; + } else + return(0); + } + expStr++; + valStr++; + } while (*valStr != 0); + if (*expStr != 0) + return (0); + else + return (1); +} + +/************************************************************************ + * * + * Parser for the Schemas Datatype Regular Expressions * + * http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs * + * * + ************************************************************************/ + +/** + * xmlFAIsChar: + * @ctxt: a regexp parser context + * + * [10] Char ::= [^.\?*+()|#x5B#x5D] + */ +static int +xmlFAIsChar(xmlRegParserCtxtPtr ctxt) { + int cur; + int len; + + cur = CUR_SCHAR(ctxt->cur, len); + if ((cur == '.') || (cur == '\\') || (cur == '?') || + (cur == '*') || (cur == '+') || (cur == '(') || + (cur == ')') || (cur == '|') || (cur == 0x5B) || + (cur == 0x5D) || (cur == 0)) + return(-1); + return(cur); +} + +/** + * xmlFAParseCharProp: + * @ctxt: a regexp parser context + * + * [27] charProp ::= IsCategory | IsBlock + * [28] IsCategory ::= Letters | Marks | Numbers | Punctuation | + * Separators | Symbols | Others + * [29] Letters ::= 'L' [ultmo]? + * [30] Marks ::= 'M' [nce]? + * [31] Numbers ::= 'N' [dlo]? + * [32] Punctuation ::= 'P' [cdseifo]? + * [33] Separators ::= 'Z' [slp]? + * [34] Symbols ::= 'S' [mcko]? + * [35] Others ::= 'C' [cfon]? + * [36] IsBlock ::= 'Is' [a-zA-Z0-9#x2D]+ + */ +static void +xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) { + int cur; + xmlRegAtomType type = (xmlRegAtomType) 0; + xmlChar *blockName = NULL; + + cur = CUR; + if (cur == 'L') { + NEXT; + cur = CUR; + if (cur == 'u') { + NEXT; + type = XML_REGEXP_LETTER_UPPERCASE; + } else if (cur == 'l') { + NEXT; + type = XML_REGEXP_LETTER_LOWERCASE; + } else if (cur == 't') { + NEXT; + type = XML_REGEXP_LETTER_TITLECASE; + } else if (cur == 'm') { + NEXT; + type = XML_REGEXP_LETTER_MODIFIER; + } else if (cur == 'o') { + NEXT; + type = XML_REGEXP_LETTER_OTHERS; + } else { + type = XML_REGEXP_LETTER; + } + } else if (cur == 'M') { + NEXT; + cur = CUR; + if (cur == 'n') { + NEXT; + /* nonspacing */ + type = XML_REGEXP_MARK_NONSPACING; + } else if (cur == 'c') { + NEXT; + /* spacing combining */ + type = XML_REGEXP_MARK_SPACECOMBINING; + } else if (cur == 'e') { + NEXT; + /* enclosing */ + type = XML_REGEXP_MARK_ENCLOSING; + } else { + /* all marks */ + type = XML_REGEXP_MARK; + } + } else if (cur == 'N') { + NEXT; + cur = CUR; + if (cur == 'd') { + NEXT; + /* digital */ + type = XML_REGEXP_NUMBER_DECIMAL; + } else if (cur == 'l') { + NEXT; + /* letter */ + type = XML_REGEXP_NUMBER_LETTER; + } else if (cur == 'o') { + NEXT; + /* other */ + type = XML_REGEXP_NUMBER_OTHERS; + } else { + /* all numbers */ + type = XML_REGEXP_NUMBER; + } + } else if (cur == 'P') { + NEXT; + cur = CUR; + if (cur == 'c') { + NEXT; + /* connector */ + type = XML_REGEXP_PUNCT_CONNECTOR; + } else if (cur == 'd') { + NEXT; + /* dash */ + type = XML_REGEXP_PUNCT_DASH; + } else if (cur == 's') { + NEXT; + /* open */ + type = XML_REGEXP_PUNCT_OPEN; + } else if (cur == 'e') { + NEXT; + /* close */ + type = XML_REGEXP_PUNCT_CLOSE; + } else if (cur == 'i') { + NEXT; + /* initial quote */ + type = XML_REGEXP_PUNCT_INITQUOTE; + } else if (cur == 'f') { + NEXT; + /* final quote */ + type = XML_REGEXP_PUNCT_FINQUOTE; + } else if (cur == 'o') { + NEXT; + /* other */ + type = XML_REGEXP_PUNCT_OTHERS; + } else { + /* all punctuation */ + type = XML_REGEXP_PUNCT; + } + } else if (cur == 'Z') { + NEXT; + cur = CUR; + if (cur == 's') { + NEXT; + /* space */ + type = XML_REGEXP_SEPAR_SPACE; + } else if (cur == 'l') { + NEXT; + /* line */ + type = XML_REGEXP_SEPAR_LINE; + } else if (cur == 'p') { + NEXT; + /* paragraph */ + type = XML_REGEXP_SEPAR_PARA; + } else { + /* all separators */ + type = XML_REGEXP_SEPAR; + } + } else if (cur == 'S') { + NEXT; + cur = CUR; + if (cur == 'm') { + NEXT; + type = XML_REGEXP_SYMBOL_MATH; + /* math */ + } else if (cur == 'c') { + NEXT; + type = XML_REGEXP_SYMBOL_CURRENCY; + /* currency */ + } else if (cur == 'k') { + NEXT; + type = XML_REGEXP_SYMBOL_MODIFIER; + /* modifiers */ + } else if (cur == 'o') { + NEXT; + type = XML_REGEXP_SYMBOL_OTHERS; + /* other */ + } else { + /* all symbols */ + type = XML_REGEXP_SYMBOL; + } + } else if (cur == 'C') { + NEXT; + cur = CUR; + if (cur == 'c') { + NEXT; + /* control */ + type = XML_REGEXP_OTHER_CONTROL; + } else if (cur == 'f') { + NEXT; + /* format */ + type = XML_REGEXP_OTHER_FORMAT; + } else if (cur == 'o') { + NEXT; + /* private use */ + type = XML_REGEXP_OTHER_PRIVATE; + } else if (cur == 'n') { + NEXT; + /* not assigned */ + type = XML_REGEXP_OTHER_NA; + } else { + /* all others */ + type = XML_REGEXP_OTHER; + } + } else if (cur == 'I') { + const xmlChar *start; + NEXT; + cur = CUR; + if (cur != 's') { + REGEXP_ERROR("IsXXXX expected"); + return; + } + NEXT; + start = ctxt->cur; + cur = CUR; + if (((cur >= 'a') && (cur <= 'z')) || + ((cur >= 'A') && (cur <= 'Z')) || + ((cur >= '0') && (cur <= '9')) || + (cur == 0x2D)) { + NEXT; + cur = CUR; + while (((cur >= 'a') && (cur <= 'z')) || + ((cur >= 'A') && (cur <= 'Z')) || + ((cur >= '0') && (cur <= '9')) || + (cur == 0x2D)) { + NEXT; + cur = CUR; + } + } + type = XML_REGEXP_BLOCK_NAME; + blockName = xmlStrndup(start, ctxt->cur - start); + } else { + REGEXP_ERROR("Unknown char property"); + return; + } + if (ctxt->atom == NULL) { + ctxt->atom = xmlRegNewAtom(ctxt, type); + if (ctxt->atom != NULL) + ctxt->atom->valuep = blockName; + } else if (ctxt->atom->type == XML_REGEXP_RANGES) { + xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, + type, 0, 0, blockName); + } +} + +/** + * xmlFAParseCharClassEsc: + * @ctxt: a regexp parser context + * + * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc ) + * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E] + * [25] catEsc ::= '\p{' charProp '}' + * [26] complEsc ::= '\P{' charProp '}' + * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW]) + */ +static void +xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) { + int cur; + + if (CUR == '.') { + if (ctxt->atom == NULL) { + ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR); + } else if (ctxt->atom->type == XML_REGEXP_RANGES) { + xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, + XML_REGEXP_ANYCHAR, 0, 0, NULL); + } + NEXT; + return; + } + if (CUR != '\\') { + REGEXP_ERROR("Escaped sequence: expecting \\"); + return; + } + NEXT; + cur = CUR; + if (cur == 'p') { + NEXT; + if (CUR != '{') { + REGEXP_ERROR("Expecting '{'"); + return; + } + NEXT; + xmlFAParseCharProp(ctxt); + if (CUR != '}') { + REGEXP_ERROR("Expecting '}'"); + return; + } + NEXT; + } else if (cur == 'P') { + NEXT; + if (CUR != '{') { + REGEXP_ERROR("Expecting '{'"); + return; + } + NEXT; + xmlFAParseCharProp(ctxt); + ctxt->atom->neg = 1; + if (CUR != '}') { + REGEXP_ERROR("Expecting '}'"); + return; + } + NEXT; + } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') || + (cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') || + (cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') || + (cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) || + (cur == 0x5E)) { + if (ctxt->atom == NULL) { + ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL); + if (ctxt->atom != NULL) { + switch (cur) { + case 'n': + ctxt->atom->codepoint = '\n'; + break; + case 'r': + ctxt->atom->codepoint = '\r'; + break; + case 't': + ctxt->atom->codepoint = '\t'; + break; + default: + ctxt->atom->codepoint = cur; + } + } + } else if (ctxt->atom->type == XML_REGEXP_RANGES) { + switch (cur) { + case 'n': + cur = '\n'; + break; + case 'r': + cur = '\r'; + break; + case 't': + cur = '\t'; + break; + } + xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, + XML_REGEXP_CHARVAL, cur, cur, NULL); + } + NEXT; + } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') || + (cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') || + (cur == 'w') || (cur == 'W')) { + xmlRegAtomType type = XML_REGEXP_ANYSPACE; + + switch (cur) { + case 's': + type = XML_REGEXP_ANYSPACE; + break; + case 'S': + type = XML_REGEXP_NOTSPACE; + break; + case 'i': + type = XML_REGEXP_INITNAME; + break; + case 'I': + type = XML_REGEXP_NOTINITNAME; + break; + case 'c': + type = XML_REGEXP_NAMECHAR; + break; + case 'C': + type = XML_REGEXP_NOTNAMECHAR; + break; + case 'd': + type = XML_REGEXP_DECIMAL; + break; + case 'D': + type = XML_REGEXP_NOTDECIMAL; + break; + case 'w': + type = XML_REGEXP_REALCHAR; + break; + case 'W': + type = XML_REGEXP_NOTREALCHAR; + break; + } + NEXT; + if (ctxt->atom == NULL) { + ctxt->atom = xmlRegNewAtom(ctxt, type); + } else if (ctxt->atom->type == XML_REGEXP_RANGES) { + xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, + type, 0, 0, NULL); + } + } else { + REGEXP_ERROR("Wrong escape sequence, misuse of character '\\'"); + } +} + +/** + * xmlFAParseCharRange: + * @ctxt: a regexp parser context + * + * [17] charRange ::= seRange | XmlCharRef | XmlCharIncDash + * [18] seRange ::= charOrEsc '-' charOrEsc + * [20] charOrEsc ::= XmlChar | SingleCharEsc + * [21] XmlChar ::= [^\#x2D#x5B#x5D] + * [22] XmlCharIncDash ::= [^\#x5B#x5D] + */ +static void +xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) { + int cur, len; + int start = -1; + int end = -1; + + if (CUR == '\0') { + REGEXP_ERROR("Expecting ']'"); + return; + } + + cur = CUR; + if (cur == '\\') { + NEXT; + cur = CUR; + switch (cur) { + case 'n': start = 0xA; break; + case 'r': start = 0xD; break; + case 't': start = 0x9; break; + case '\\': case '|': case '.': case '-': case '^': case '?': + case '*': case '+': case '{': case '}': case '(': case ')': + case '[': case ']': + start = cur; break; + default: + REGEXP_ERROR("Invalid escape value"); + return; + } + end = start; + len = 1; + } else if ((cur != 0x5B) && (cur != 0x5D)) { + end = start = CUR_SCHAR(ctxt->cur, len); + } else { + REGEXP_ERROR("Expecting a char range"); + return; + } + /* + * Since we are "inside" a range, we can assume ctxt->cur is past + * the start of ctxt->string, and PREV should be safe + */ + if ((start == '-') && (NXT(1) != ']') && (PREV != '[') && (PREV != '^')) { + NEXTL(len); + return; + } + NEXTL(len); + cur = CUR; + if ((cur != '-') || (NXT(1) == ']')) { + xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, + XML_REGEXP_CHARVAL, start, end, NULL); + return; + } + NEXT; + cur = CUR; + if (cur == '\\') { + NEXT; + cur = CUR; + switch (cur) { + case 'n': end = 0xA; break; + case 'r': end = 0xD; break; + case 't': end = 0x9; break; + case '\\': case '|': case '.': case '-': case '^': case '?': + case '*': case '+': case '{': case '}': case '(': case ')': + case '[': case ']': + end = cur; break; + default: + REGEXP_ERROR("Invalid escape value"); + return; + } + len = 1; + } else if ((cur != 0x5B) && (cur != 0x5D)) { + end = CUR_SCHAR(ctxt->cur, len); + } else { + REGEXP_ERROR("Expecting the end of a char range"); + return; + } + NEXTL(len); + /* TODO check that the values are acceptable character ranges for XML */ + if (end < start) { + REGEXP_ERROR("End of range is before start of range"); + } else { + xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, + XML_REGEXP_CHARVAL, start, end, NULL); + } + return; +} + +/** + * xmlFAParsePosCharGroup: + * @ctxt: a regexp parser context + * + * [14] posCharGroup ::= ( charRange | charClassEsc )+ + */ +static void +xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) { + do { + if (CUR == '\\') { + xmlFAParseCharClassEsc(ctxt); + } else { + xmlFAParseCharRange(ctxt); + } + } while ((CUR != ']') && (CUR != '^') && (CUR != '-') && + (CUR != 0) && (ctxt->error == 0)); +} + +/** + * xmlFAParseCharGroup: + * @ctxt: a regexp parser context + * + * [13] charGroup ::= posCharGroup | negCharGroup | charClassSub + * [15] negCharGroup ::= '^' posCharGroup + * [16] charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr + * [12] charClassExpr ::= '[' charGroup ']' + */ +static void +xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) { + int n = ctxt->neg; + while ((CUR != ']') && (ctxt->error == 0)) { + if (CUR == '^') { + int neg = ctxt->neg; + + NEXT; + ctxt->neg = !ctxt->neg; + xmlFAParsePosCharGroup(ctxt); + ctxt->neg = neg; + } else if ((CUR == '-') && (NXT(1) == '[')) { + int neg = ctxt->neg; + ctxt->neg = 2; + NEXT; /* eat the '-' */ + NEXT; /* eat the '[' */ + xmlFAParseCharGroup(ctxt); + if (CUR == ']') { + NEXT; + } else { + REGEXP_ERROR("charClassExpr: ']' expected"); + break; + } + ctxt->neg = neg; + break; + } else if (CUR != ']') { + xmlFAParsePosCharGroup(ctxt); + } + } + ctxt->neg = n; +} + +/** + * xmlFAParseCharClass: + * @ctxt: a regexp parser context + * + * [11] charClass ::= charClassEsc | charClassExpr + * [12] charClassExpr ::= '[' charGroup ']' + */ +static void +xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) { + if (CUR == '[') { + NEXT; + ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES); + if (ctxt->atom == NULL) + return; + xmlFAParseCharGroup(ctxt); + if (CUR == ']') { + NEXT; + } else { + REGEXP_ERROR("xmlFAParseCharClass: ']' expected"); + } + } else { + xmlFAParseCharClassEsc(ctxt); + } +} + +/** + * xmlFAParseQuantExact: + * @ctxt: a regexp parser context + * + * [8] QuantExact ::= [0-9]+ + * + * Returns 0 if success or -1 in case of error + */ +static int +xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) { + int ret = 0; + int ok = 0; + + while ((CUR >= '0') && (CUR <= '9')) { + ret = ret * 10 + (CUR - '0'); + ok = 1; + NEXT; + } + if (ok != 1) { + return(-1); + } + return(ret); +} + +/** + * xmlFAParseQuantifier: + * @ctxt: a regexp parser context + * + * [4] quantifier ::= [?*+] | ( '{' quantity '}' ) + * [5] quantity ::= quantRange | quantMin | QuantExact + * [6] quantRange ::= QuantExact ',' QuantExact + * [7] quantMin ::= QuantExact ',' + * [8] QuantExact ::= [0-9]+ + */ +static int +xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) { + int cur; + + cur = CUR; + if ((cur == '?') || (cur == '*') || (cur == '+')) { + if (ctxt->atom != NULL) { + if (cur == '?') + ctxt->atom->quant = XML_REGEXP_QUANT_OPT; + else if (cur == '*') + ctxt->atom->quant = XML_REGEXP_QUANT_MULT; + else if (cur == '+') + ctxt->atom->quant = XML_REGEXP_QUANT_PLUS; + } + NEXT; + return(1); + } + if (cur == '{') { + int min = 0, max = 0; + + NEXT; + cur = xmlFAParseQuantExact(ctxt); + if (cur >= 0) + min = cur; + if (CUR == ',') { + NEXT; + if (CUR == '}') + max = INT_MAX; + else { + cur = xmlFAParseQuantExact(ctxt); + if (cur >= 0) + max = cur; + else { + REGEXP_ERROR("Improper quantifier"); + } + } + } + if (CUR == '}') { + NEXT; + } else { + REGEXP_ERROR("Unterminated quantifier"); + } + if (max == 0) + max = min; + if (ctxt->atom != NULL) { + ctxt->atom->quant = XML_REGEXP_QUANT_RANGE; + ctxt->atom->min = min; + ctxt->atom->max = max; + } + return(1); + } + return(0); +} + +/** + * xmlFAParseAtom: + * @ctxt: a regexp parser context + * + * [9] atom ::= Char | charClass | ( '(' regExp ')' ) + */ +static int +xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) { + int codepoint, len; + + codepoint = xmlFAIsChar(ctxt); + if (codepoint > 0) { + ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL); + if (ctxt->atom == NULL) + return(-1); + codepoint = CUR_SCHAR(ctxt->cur, len); + ctxt->atom->codepoint = codepoint; + NEXTL(len); + return(1); + } else if (CUR == '|') { + return(0); + } else if (CUR == 0) { + return(0); + } else if (CUR == ')') { + return(0); + } else if (CUR == '(') { + xmlRegStatePtr start, oldend, start0; + + NEXT; + /* + * this extra Epsilon transition is needed if we count with 0 allowed + * unfortunately this can't be known at that point + */ + xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL); + start0 = ctxt->state; + xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL); + start = ctxt->state; + oldend = ctxt->end; + ctxt->end = NULL; + ctxt->atom = NULL; + xmlFAParseRegExp(ctxt, 0); + if (CUR == ')') { + NEXT; + } else { + REGEXP_ERROR("xmlFAParseAtom: expecting ')'"); + } + ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG); + if (ctxt->atom == NULL) + return(-1); + ctxt->atom->start = start; + ctxt->atom->start0 = start0; + ctxt->atom->stop = ctxt->state; + ctxt->end = oldend; + return(1); + } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) { + xmlFAParseCharClass(ctxt); + return(1); + } + return(0); +} + +/** + * xmlFAParsePiece: + * @ctxt: a regexp parser context + * + * [3] piece ::= atom quantifier? + */ +static int +xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) { + int ret; + + ctxt->atom = NULL; + ret = xmlFAParseAtom(ctxt); + if (ret == 0) + return(0); + if (ctxt->atom == NULL) { + REGEXP_ERROR("internal: no atom generated"); + } + xmlFAParseQuantifier(ctxt); + return(1); +} + +/** + * xmlFAParseBranch: + * @ctxt: a regexp parser context + * @to: optional target to the end of the branch + * + * @to is used to optimize by removing duplicate path in automata + * in expressions like (a|b)(c|d) + * + * [2] branch ::= piece* + */ +static int +xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) { + xmlRegStatePtr previous; + int ret; + + previous = ctxt->state; + ret = xmlFAParsePiece(ctxt); + if (ret != 0) { + if (xmlFAGenerateTransitions(ctxt, previous, + (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0) + return(-1); + previous = ctxt->state; + ctxt->atom = NULL; + } + while ((ret != 0) && (ctxt->error == 0)) { + ret = xmlFAParsePiece(ctxt); + if (ret != 0) { + if (xmlFAGenerateTransitions(ctxt, previous, + (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0) + return(-1); + previous = ctxt->state; + ctxt->atom = NULL; + } + } + return(0); +} + +/** + * xmlFAParseRegExp: + * @ctxt: a regexp parser context + * @top: is this the top-level expression ? + * + * [1] regExp ::= branch ( '|' branch )* + */ +static void +xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) { + xmlRegStatePtr start, end; + + /* if not top start should have been generated by an epsilon trans */ + start = ctxt->state; + ctxt->end = NULL; + xmlFAParseBranch(ctxt, NULL); + if (top) { +#ifdef DEBUG_REGEXP_GRAPH + printf("State %d is final\n", ctxt->state->no); +#endif + ctxt->state->type = XML_REGEXP_FINAL_STATE; + } + if (CUR != '|') { + ctxt->end = ctxt->state; + return; + } + end = ctxt->state; + while ((CUR == '|') && (ctxt->error == 0)) { + NEXT; + ctxt->state = start; + ctxt->end = NULL; + xmlFAParseBranch(ctxt, end); + } + if (!top) { + ctxt->state = end; + ctxt->end = end; + } +} + +/************************************************************************ + * * + * The basic API * + * * + ************************************************************************/ + +/** + * xmlRegexpCompile: + * @regexp: a regular expression string + * + * Parses a regular expression conforming to XML Schemas Part 2 Datatype + * Appendix F and builds an automata suitable for testing strings against + * that regular expression + * + * Returns the compiled expression or NULL in case of error + */ +xmlRegexpPtr +xmlRegexpCompile(const xmlChar *regexp) { + xmlRegexpPtr ret; + xmlRegParserCtxtPtr ctxt; + + ctxt = xmlRegNewParserCtxt(regexp); + if (ctxt == NULL) + return(NULL); + + /* initialize the parser */ + ctxt->end = NULL; + ctxt->start = ctxt->state = xmlRegNewState(ctxt); + xmlRegStatePush(ctxt, ctxt->start); + + /* parse the expression building an automata */ + xmlFAParseRegExp(ctxt, 1); + if (CUR != 0) { + REGEXP_ERROR("xmlFAParseRegExp: extra characters"); + } + if (ctxt->error != 0) { + xmlRegFreeParserCtxt(ctxt); + return(NULL); + } + ctxt->end = ctxt->state; + ctxt->start->type = XML_REGEXP_START_STATE; + ctxt->end->type = XML_REGEXP_FINAL_STATE; + + /* remove the Epsilon except for counted transitions */ + xmlFAEliminateEpsilonTransitions(ctxt); + + + if (ctxt->error != 0) { + xmlRegFreeParserCtxt(ctxt); + return(NULL); + } + ret = xmlRegEpxFromParse(ctxt); + xmlRegFreeParserCtxt(ctxt); + return(ret); +} + +/** + * xmlRegexpExec: + * @comp: the compiled regular expression + * @content: the value to check against the regular expression + * + * Check if the regular expression generates the value + * + * Returns 1 if it matches, 0 if not and a negative value in case of error + */ +int +xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) { + if ((comp == NULL) || (content == NULL)) + return(-1); + return(xmlFARegExec(comp, content)); +} + +/** + * xmlRegexpIsDeterminist: + * @comp: the compiled regular expression + * + * Check if the regular expression is determinist + * + * Returns 1 if it yes, 0 if not and a negative value in case of error + */ +int +xmlRegexpIsDeterminist(xmlRegexpPtr comp) { + xmlAutomataPtr am; + int ret; + + if (comp == NULL) + return(-1); + if (comp->determinist != -1) + return(comp->determinist); + + am = xmlNewAutomata(); + if (am->states != NULL) { + int i; + + for (i = 0;i < am->nbStates;i++) + xmlRegFreeState(am->states[i]); + REGEXP_FREE(am->states); + } + am->nbAtoms = comp->nbAtoms; + am->atoms = comp->atoms; + am->nbStates = comp->nbStates; + am->states = comp->states; + am->determinist = -1; + am->flags = comp->flags; + ret = xmlFAComputesDeterminism(am); + am->atoms = NULL; + am->states = NULL; + xmlFreeAutomata(am); + comp->determinist = ret; + return(ret); +} + +/** + * xmlRegFreeRegexp: + * @regexp: the regexp + * + * Free a regexp + */ +void +xmlRegFreeRegexp(xmlRegexpPtr regexp) { + int i; + if (regexp == NULL) + return; + + if (regexp->string != NULL) + REGEXP_FREE(regexp->string); + if (regexp->states != NULL) { + for (i = 0;i < regexp->nbStates;i++) + xmlRegFreeState(regexp->states[i]); + REGEXP_FREE(regexp->states); + } + if (regexp->atoms != NULL) { + for (i = 0;i < regexp->nbAtoms;i++) + xmlRegFreeAtom(regexp->atoms[i]); + REGEXP_FREE(regexp->atoms); + } + if (regexp->counters != NULL) + REGEXP_FREE(regexp->counters); + if (regexp->compact != NULL) + REGEXP_FREE(regexp->compact); + if (regexp->transdata != NULL) + REGEXP_FREE(regexp->transdata); + if (regexp->stringMap != NULL) { + for (i = 0; i < regexp->nbstrings;i++) + REGEXP_FREE(regexp->stringMap[i]); + REGEXP_FREE(regexp->stringMap); + } + + REGEXP_FREE(regexp); +} + +/************************************************************************ + * * + * The Automata interface * + * * + ************************************************************************/ + +/** + * xmlNewAutomata: + * + * Create a new automata + * + * Returns the new object or NULL in case of failure + */ +static xmlAutomataPtr +xmlNewAutomata(void) { + xmlAutomataPtr ctxt; + + ctxt = xmlRegNewParserCtxt(NULL); + if (ctxt == NULL) + return(NULL); + + /* initialize the parser */ + ctxt->end = NULL; + ctxt->start = ctxt->state = xmlRegNewState(ctxt); + if (ctxt->start == NULL) { + xmlFreeAutomata(ctxt); + return(NULL); + } + ctxt->start->type = XML_REGEXP_START_STATE; + if (xmlRegStatePush(ctxt, ctxt->start) < 0) { + xmlRegFreeState(ctxt->start); + xmlFreeAutomata(ctxt); + return(NULL); + } + ctxt->flags = 0; + + return(ctxt); +} + +/** + * xmlFreeAutomata: + * @am: an automata + * + * Free an automata + */ +static void +xmlFreeAutomata(xmlAutomataPtr am) { + if (am == NULL) + return; + xmlRegFreeParserCtxt(am); +} + +#include diff --git a/libxsde/xsde/c/regexp/xmlregexp.h b/libxsde/xsde/c/regexp/xmlregexp.h new file mode 100644 index 0000000..948e3ca --- /dev/null +++ b/libxsde/xsde/c/regexp/xmlregexp.h @@ -0,0 +1,39 @@ +/* + * Summary: regular expressions handling + * Description: basic API for libxml regular expressions handling used + * for XML Schemas and validation. + * + * See COPYING for the status of this software. + */ + +#ifndef __XML_REGEXP_H__ +#define __XML_REGEXP_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * xmlRegexpPtr: + * + * A libxml regular expression, they can actually be far more complex + * thank the POSIX regex expressions. + */ +typedef struct _xmlRegexp xmlRegexp; +typedef xmlRegexp *xmlRegexpPtr; + +typedef unsigned char xmlChar; + +/* + * The POSIX like API + */ +xmlRegexpPtr xmlRegexpCompile (const xmlChar *regexp); +void xmlRegFreeRegexp (xmlRegexpPtr regexp); +int xmlRegexpExec (xmlRegexpPtr comp, const xmlChar *value); +int xmlRegexpIsDeterminist (xmlRegexpPtr comp); + +#ifdef __cplusplus +} +#endif + +#endif /*__XML_REGEXP_H__ */ diff --git a/libxsde/xsde/c/regexp/xmlunicode.c b/libxsde/xsde/c/regexp/xmlunicode.c new file mode 100644 index 0000000..848c148 --- /dev/null +++ b/libxsde/xsde/c/regexp/xmlunicode.c @@ -0,0 +1,3172 @@ +/* + * xmlunicode.c: this module implements the Unicode character APIs + * + * This file is automatically generated from the + * UCS description files of the Unicode Character Database + * http://www.unicode.org/Public/4.0-Update1/UCD-4.0.1.html + * using the genUnicode.py Python script. + * + * Generation date: Mon Mar 27 11:09:52 2006 + * Sources: Blocks-4.0.1.txt UnicodeData-4.0.1.txt + */ +#include + +#include + +#include "xmlunicode.h" +#include "chvalid.h" + +typedef int (xmlIntFunc)(int); /* just to keep one's mind untwisted */ + +typedef struct { + const char *rangename; + xmlIntFunc *func; +} xmlUnicodeRange; + +typedef struct { + xmlUnicodeRange *table; + int numentries; +} xmlUnicodeNameTable; + + +static xmlIntFunc *xmlUnicodeLookup(xmlUnicodeNameTable *tptr, const char *tname); + +static xmlUnicodeRange xmlUnicodeBlocks[] = { + {"AegeanNumbers", xmlUCSIsAegeanNumbers}, + {"AlphabeticPresentationForms", xmlUCSIsAlphabeticPresentationForms}, + {"Arabic", xmlUCSIsArabic}, + {"ArabicPresentationForms-A", xmlUCSIsArabicPresentationFormsA}, + {"ArabicPresentationForms-B", xmlUCSIsArabicPresentationFormsB}, + {"Armenian", xmlUCSIsArmenian}, + {"Arrows", xmlUCSIsArrows}, + {"BasicLatin", xmlUCSIsBasicLatin}, + {"Bengali", xmlUCSIsBengali}, + {"BlockElements", xmlUCSIsBlockElements}, + {"Bopomofo", xmlUCSIsBopomofo}, + {"BopomofoExtended", xmlUCSIsBopomofoExtended}, + {"BoxDrawing", xmlUCSIsBoxDrawing}, + {"BraillePatterns", xmlUCSIsBraillePatterns}, + {"Buhid", xmlUCSIsBuhid}, + {"ByzantineMusicalSymbols", xmlUCSIsByzantineMusicalSymbols}, + {"CJKCompatibility", xmlUCSIsCJKCompatibility}, + {"CJKCompatibilityForms", xmlUCSIsCJKCompatibilityForms}, + {"CJKCompatibilityIdeographs", xmlUCSIsCJKCompatibilityIdeographs}, + {"CJKCompatibilityIdeographsSupplement", xmlUCSIsCJKCompatibilityIdeographsSupplement}, + {"CJKRadicalsSupplement", xmlUCSIsCJKRadicalsSupplement}, + {"CJKSymbolsandPunctuation", xmlUCSIsCJKSymbolsandPunctuation}, + {"CJKUnifiedIdeographs", xmlUCSIsCJKUnifiedIdeographs}, + {"CJKUnifiedIdeographsExtensionA", xmlUCSIsCJKUnifiedIdeographsExtensionA}, + {"CJKUnifiedIdeographsExtensionB", xmlUCSIsCJKUnifiedIdeographsExtensionB}, + {"Cherokee", xmlUCSIsCherokee}, + {"CombiningDiacriticalMarks", xmlUCSIsCombiningDiacriticalMarks}, + {"CombiningDiacriticalMarksforSymbols", xmlUCSIsCombiningDiacriticalMarksforSymbols}, + {"CombiningHalfMarks", xmlUCSIsCombiningHalfMarks}, + {"CombiningMarksforSymbols", xmlUCSIsCombiningMarksforSymbols}, + {"ControlPictures", xmlUCSIsControlPictures}, + {"CurrencySymbols", xmlUCSIsCurrencySymbols}, + {"CypriotSyllabary", xmlUCSIsCypriotSyllabary}, + {"Cyrillic", xmlUCSIsCyrillic}, + {"CyrillicSupplement", xmlUCSIsCyrillicSupplement}, + {"Deseret", xmlUCSIsDeseret}, + {"Devanagari", xmlUCSIsDevanagari}, + {"Dingbats", xmlUCSIsDingbats}, + {"EnclosedAlphanumerics", xmlUCSIsEnclosedAlphanumerics}, + {"EnclosedCJKLettersandMonths", xmlUCSIsEnclosedCJKLettersandMonths}, + {"Ethiopic", xmlUCSIsEthiopic}, + {"GeneralPunctuation", xmlUCSIsGeneralPunctuation}, + {"GeometricShapes", xmlUCSIsGeometricShapes}, + {"Georgian", xmlUCSIsGeorgian}, + {"Gothic", xmlUCSIsGothic}, + {"Greek", xmlUCSIsGreek}, + {"GreekExtended", xmlUCSIsGreekExtended}, + {"GreekandCoptic", xmlUCSIsGreekandCoptic}, + {"Gujarati", xmlUCSIsGujarati}, + {"Gurmukhi", xmlUCSIsGurmukhi}, + {"HalfwidthandFullwidthForms", xmlUCSIsHalfwidthandFullwidthForms}, + {"HangulCompatibilityJamo", xmlUCSIsHangulCompatibilityJamo}, + {"HangulJamo", xmlUCSIsHangulJamo}, + {"HangulSyllables", xmlUCSIsHangulSyllables}, + {"Hanunoo", xmlUCSIsHanunoo}, + {"Hebrew", xmlUCSIsHebrew}, + {"HighPrivateUseSurrogates", xmlUCSIsHighPrivateUseSurrogates}, + {"HighSurrogates", xmlUCSIsHighSurrogates}, + {"Hiragana", xmlUCSIsHiragana}, + {"IPAExtensions", xmlUCSIsIPAExtensions}, + {"IdeographicDescriptionCharacters", xmlUCSIsIdeographicDescriptionCharacters}, + {"Kanbun", xmlUCSIsKanbun}, + {"KangxiRadicals", xmlUCSIsKangxiRadicals}, + {"Kannada", xmlUCSIsKannada}, + {"Katakana", xmlUCSIsKatakana}, + {"KatakanaPhoneticExtensions", xmlUCSIsKatakanaPhoneticExtensions}, + {"Khmer", xmlUCSIsKhmer}, + {"KhmerSymbols", xmlUCSIsKhmerSymbols}, + {"Lao", xmlUCSIsLao}, + {"Latin-1Supplement", xmlUCSIsLatin1Supplement}, + {"LatinExtended-A", xmlUCSIsLatinExtendedA}, + {"LatinExtended-B", xmlUCSIsLatinExtendedB}, + {"LatinExtendedAdditional", xmlUCSIsLatinExtendedAdditional}, + {"LetterlikeSymbols", xmlUCSIsLetterlikeSymbols}, + {"Limbu", xmlUCSIsLimbu}, + {"LinearBIdeograms", xmlUCSIsLinearBIdeograms}, + {"LinearBSyllabary", xmlUCSIsLinearBSyllabary}, + {"LowSurrogates", xmlUCSIsLowSurrogates}, + {"Malayalam", xmlUCSIsMalayalam}, + {"MathematicalAlphanumericSymbols", xmlUCSIsMathematicalAlphanumericSymbols}, + {"MathematicalOperators", xmlUCSIsMathematicalOperators}, + {"MiscellaneousMathematicalSymbols-A", xmlUCSIsMiscellaneousMathematicalSymbolsA}, + {"MiscellaneousMathematicalSymbols-B", xmlUCSIsMiscellaneousMathematicalSymbolsB}, + {"MiscellaneousSymbols", xmlUCSIsMiscellaneousSymbols}, + {"MiscellaneousSymbolsandArrows", xmlUCSIsMiscellaneousSymbolsandArrows}, + {"MiscellaneousTechnical", xmlUCSIsMiscellaneousTechnical}, + {"Mongolian", xmlUCSIsMongolian}, + {"MusicalSymbols", xmlUCSIsMusicalSymbols}, + {"Myanmar", xmlUCSIsMyanmar}, + {"NumberForms", xmlUCSIsNumberForms}, + {"Ogham", xmlUCSIsOgham}, + {"OldItalic", xmlUCSIsOldItalic}, + {"OpticalCharacterRecognition", xmlUCSIsOpticalCharacterRecognition}, + {"Oriya", xmlUCSIsOriya}, + {"Osmanya", xmlUCSIsOsmanya}, + {"PhoneticExtensions", xmlUCSIsPhoneticExtensions}, + {"PrivateUse", xmlUCSIsPrivateUse}, + {"PrivateUseArea", xmlUCSIsPrivateUseArea}, + {"Runic", xmlUCSIsRunic}, + {"Shavian", xmlUCSIsShavian}, + {"Sinhala", xmlUCSIsSinhala}, + {"SmallFormVariants", xmlUCSIsSmallFormVariants}, + {"SpacingModifierLetters", xmlUCSIsSpacingModifierLetters}, + {"Specials", xmlUCSIsSpecials}, + {"SuperscriptsandSubscripts", xmlUCSIsSuperscriptsandSubscripts}, + {"SupplementalArrows-A", xmlUCSIsSupplementalArrowsA}, + {"SupplementalArrows-B", xmlUCSIsSupplementalArrowsB}, + {"SupplementalMathematicalOperators", xmlUCSIsSupplementalMathematicalOperators}, + {"SupplementaryPrivateUseArea-A", xmlUCSIsSupplementaryPrivateUseAreaA}, + {"SupplementaryPrivateUseArea-B", xmlUCSIsSupplementaryPrivateUseAreaB}, + {"Syriac", xmlUCSIsSyriac}, + {"Tagalog", xmlUCSIsTagalog}, + {"Tagbanwa", xmlUCSIsTagbanwa}, + {"Tags", xmlUCSIsTags}, + {"TaiLe", xmlUCSIsTaiLe}, + {"TaiXuanJingSymbols", xmlUCSIsTaiXuanJingSymbols}, + {"Tamil", xmlUCSIsTamil}, + {"Telugu", xmlUCSIsTelugu}, + {"Thaana", xmlUCSIsThaana}, + {"Thai", xmlUCSIsThai}, + {"Tibetan", xmlUCSIsTibetan}, + {"Ugaritic", xmlUCSIsUgaritic}, + {"UnifiedCanadianAboriginalSyllabics", xmlUCSIsUnifiedCanadianAboriginalSyllabics}, + {"VariationSelectors", xmlUCSIsVariationSelectors}, + {"VariationSelectorsSupplement", xmlUCSIsVariationSelectorsSupplement}, + {"YiRadicals", xmlUCSIsYiRadicals}, + {"YiSyllables", xmlUCSIsYiSyllables}, + {"YijingHexagramSymbols", xmlUCSIsYijingHexagramSymbols}}; + +static xmlUnicodeRange xmlUnicodeCats[] = { + {"C", xmlUCSIsCatC}, + {"Cc", xmlUCSIsCatCc}, + {"Cf", xmlUCSIsCatCf}, + {"Co", xmlUCSIsCatCo}, + {"Cs", xmlUCSIsCatCs}, + {"L", xmlUCSIsCatL}, + {"Ll", xmlUCSIsCatLl}, + {"Lm", xmlUCSIsCatLm}, + {"Lo", xmlUCSIsCatLo}, + {"Lt", xmlUCSIsCatLt}, + {"Lu", xmlUCSIsCatLu}, + {"M", xmlUCSIsCatM}, + {"Mc", xmlUCSIsCatMc}, + {"Me", xmlUCSIsCatMe}, + {"Mn", xmlUCSIsCatMn}, + {"N", xmlUCSIsCatN}, + {"Nd", xmlUCSIsCatNd}, + {"Nl", xmlUCSIsCatNl}, + {"No", xmlUCSIsCatNo}, + {"P", xmlUCSIsCatP}, + {"Pc", xmlUCSIsCatPc}, + {"Pd", xmlUCSIsCatPd}, + {"Pe", xmlUCSIsCatPe}, + {"Pf", xmlUCSIsCatPf}, + {"Pi", xmlUCSIsCatPi}, + {"Po", xmlUCSIsCatPo}, + {"Ps", xmlUCSIsCatPs}, + {"S", xmlUCSIsCatS}, + {"Sc", xmlUCSIsCatSc}, + {"Sk", xmlUCSIsCatSk}, + {"Sm", xmlUCSIsCatSm}, + {"So", xmlUCSIsCatSo}, + {"Z", xmlUCSIsCatZ}, + {"Zl", xmlUCSIsCatZl}, + {"Zp", xmlUCSIsCatZp}, + {"Zs", xmlUCSIsCatZs}}; + +static const xmlChSRange xmlCS[] = {{0x0, 0x1f}, {0x7f, 0x9f}, + {0xad, 0xad}, {0x600, 0x603}, {0x6dd, 0x6dd}, {0x70f, 0x70f}, + {0x17b4, 0x17b5}, {0x200b, 0x200f}, {0x202a, 0x202e}, {0x2060, 0x2063}, + {0x206a, 0x206f}, {0xd800, 0xd800}, {0xdb7f, 0xdb80}, {0xdbff, 0xdc00}, + {0xdfff, 0xe000}, {0xf8ff, 0xf8ff}, {0xfeff, 0xfeff}, {0xfff9, 0xfffb} }; +static const xmlChLRange xmlCL[] = {{0x1d173, 0x1d17a}, {0xe0001, 0xe0001}, + {0xe0020, 0xe007f}, {0xf0000, 0xf0000}, {0xffffd, 0xffffd}, + {0x100000, 0x100000}, {0x10fffd, 0x10fffd} }; +static xmlChRangeGroup xmlCG = {18,7,xmlCS,xmlCL}; + +static const xmlChSRange xmlCfS[] = {{0xad, 0xad}, {0x600, 0x603}, + {0x6dd, 0x6dd}, {0x70f, 0x70f}, {0x17b4, 0x17b5}, {0x200b, 0x200f}, + {0x202a, 0x202e}, {0x2060, 0x2063}, {0x206a, 0x206f}, {0xfeff, 0xfeff}, + {0xfff9, 0xfffb} }; +static const xmlChLRange xmlCfL[] = {{0x1d173, 0x1d17a}, {0xe0001, 0xe0001}, + {0xe0020, 0xe007f} }; +static xmlChRangeGroup xmlCfG = {11,3,xmlCfS,xmlCfL}; + +static const xmlChSRange xmlLS[] = {{0x41, 0x5a}, {0x61, 0x7a}, + {0xaa, 0xaa}, {0xb5, 0xb5}, {0xba, 0xba}, {0xc0, 0xd6}, {0xd8, 0xf6}, + {0xf8, 0x236}, {0x250, 0x2c1}, {0x2c6, 0x2d1}, {0x2e0, 0x2e4}, + {0x2ee, 0x2ee}, {0x37a, 0x37a}, {0x386, 0x386}, {0x388, 0x38a}, + {0x38c, 0x38c}, {0x38e, 0x3a1}, {0x3a3, 0x3ce}, {0x3d0, 0x3f5}, + {0x3f7, 0x3fb}, {0x400, 0x481}, {0x48a, 0x4ce}, {0x4d0, 0x4f5}, + {0x4f8, 0x4f9}, {0x500, 0x50f}, {0x531, 0x556}, {0x559, 0x559}, + {0x561, 0x587}, {0x5d0, 0x5ea}, {0x5f0, 0x5f2}, {0x621, 0x63a}, + {0x640, 0x64a}, {0x66e, 0x66f}, {0x671, 0x6d3}, {0x6d5, 0x6d5}, + {0x6e5, 0x6e6}, {0x6ee, 0x6ef}, {0x6fa, 0x6fc}, {0x6ff, 0x6ff}, + {0x710, 0x710}, {0x712, 0x72f}, {0x74d, 0x74f}, {0x780, 0x7a5}, + {0x7b1, 0x7b1}, {0x904, 0x939}, {0x93d, 0x93d}, {0x950, 0x950}, + {0x958, 0x961}, {0x985, 0x98c}, {0x98f, 0x990}, {0x993, 0x9a8}, + {0x9aa, 0x9b0}, {0x9b2, 0x9b2}, {0x9b6, 0x9b9}, {0x9bd, 0x9bd}, + {0x9dc, 0x9dd}, {0x9df, 0x9e1}, {0x9f0, 0x9f1}, {0xa05, 0xa0a}, + {0xa0f, 0xa10}, {0xa13, 0xa28}, {0xa2a, 0xa30}, {0xa32, 0xa33}, + {0xa35, 0xa36}, {0xa38, 0xa39}, {0xa59, 0xa5c}, {0xa5e, 0xa5e}, + {0xa72, 0xa74}, {0xa85, 0xa8d}, {0xa8f, 0xa91}, {0xa93, 0xaa8}, + {0xaaa, 0xab0}, {0xab2, 0xab3}, {0xab5, 0xab9}, {0xabd, 0xabd}, + {0xad0, 0xad0}, {0xae0, 0xae1}, {0xb05, 0xb0c}, {0xb0f, 0xb10}, + {0xb13, 0xb28}, {0xb2a, 0xb30}, {0xb32, 0xb33}, {0xb35, 0xb39}, + {0xb3d, 0xb3d}, {0xb5c, 0xb5d}, {0xb5f, 0xb61}, {0xb71, 0xb71}, + {0xb83, 0xb83}, {0xb85, 0xb8a}, {0xb8e, 0xb90}, {0xb92, 0xb95}, + {0xb99, 0xb9a}, {0xb9c, 0xb9c}, {0xb9e, 0xb9f}, {0xba3, 0xba4}, + {0xba8, 0xbaa}, {0xbae, 0xbb5}, {0xbb7, 0xbb9}, {0xc05, 0xc0c}, + {0xc0e, 0xc10}, {0xc12, 0xc28}, {0xc2a, 0xc33}, {0xc35, 0xc39}, + {0xc60, 0xc61}, {0xc85, 0xc8c}, {0xc8e, 0xc90}, {0xc92, 0xca8}, + {0xcaa, 0xcb3}, {0xcb5, 0xcb9}, {0xcbd, 0xcbd}, {0xcde, 0xcde}, + {0xce0, 0xce1}, {0xd05, 0xd0c}, {0xd0e, 0xd10}, {0xd12, 0xd28}, + {0xd2a, 0xd39}, {0xd60, 0xd61}, {0xd85, 0xd96}, {0xd9a, 0xdb1}, + {0xdb3, 0xdbb}, {0xdbd, 0xdbd}, {0xdc0, 0xdc6}, {0xe01, 0xe30}, + {0xe32, 0xe33}, {0xe40, 0xe46}, {0xe81, 0xe82}, {0xe84, 0xe84}, + {0xe87, 0xe88}, {0xe8a, 0xe8a}, {0xe8d, 0xe8d}, {0xe94, 0xe97}, + {0xe99, 0xe9f}, {0xea1, 0xea3}, {0xea5, 0xea5}, {0xea7, 0xea7}, + {0xeaa, 0xeab}, {0xead, 0xeb0}, {0xeb2, 0xeb3}, {0xebd, 0xebd}, + {0xec0, 0xec4}, {0xec6, 0xec6}, {0xedc, 0xedd}, {0xf00, 0xf00}, + {0xf40, 0xf47}, {0xf49, 0xf6a}, {0xf88, 0xf8b}, {0x1000, 0x1021}, + {0x1023, 0x1027}, {0x1029, 0x102a}, {0x1050, 0x1055}, {0x10a0, 0x10c5}, + {0x10d0, 0x10f8}, {0x1100, 0x1159}, {0x115f, 0x11a2}, {0x11a8, 0x11f9}, + {0x1200, 0x1206}, {0x1208, 0x1246}, {0x1248, 0x1248}, {0x124a, 0x124d}, + {0x1250, 0x1256}, {0x1258, 0x1258}, {0x125a, 0x125d}, {0x1260, 0x1286}, + {0x1288, 0x1288}, {0x128a, 0x128d}, {0x1290, 0x12ae}, {0x12b0, 0x12b0}, + {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c0, 0x12c0}, {0x12c2, 0x12c5}, + {0x12c8, 0x12ce}, {0x12d0, 0x12d6}, {0x12d8, 0x12ee}, {0x12f0, 0x130e}, + {0x1310, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x131e}, {0x1320, 0x1346}, + {0x1348, 0x135a}, {0x13a0, 0x13f4}, {0x1401, 0x166c}, {0x166f, 0x1676}, + {0x1681, 0x169a}, {0x16a0, 0x16ea}, {0x1700, 0x170c}, {0x170e, 0x1711}, + {0x1720, 0x1731}, {0x1740, 0x1751}, {0x1760, 0x176c}, {0x176e, 0x1770}, + {0x1780, 0x17b3}, {0x17d7, 0x17d7}, {0x17dc, 0x17dc}, {0x1820, 0x1877}, + {0x1880, 0x18a8}, {0x1900, 0x191c}, {0x1950, 0x196d}, {0x1970, 0x1974}, + {0x1d00, 0x1d6b}, {0x1e00, 0x1e9b}, {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, + {0x1f18, 0x1f1d}, {0x1f20, 0x1f45}, {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, + {0x1f59, 0x1f59}, {0x1f5b, 0x1f5b}, {0x1f5d, 0x1f5d}, {0x1f5f, 0x1f7d}, + {0x1f80, 0x1fb4}, {0x1fb6, 0x1fbc}, {0x1fbe, 0x1fbe}, {0x1fc2, 0x1fc4}, + {0x1fc6, 0x1fcc}, {0x1fd0, 0x1fd3}, {0x1fd6, 0x1fdb}, {0x1fe0, 0x1fec}, + {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffc}, {0x2071, 0x2071}, {0x207f, 0x207f}, + {0x2102, 0x2102}, {0x2107, 0x2107}, {0x210a, 0x2113}, {0x2115, 0x2115}, + {0x2119, 0x211d}, {0x2124, 0x2124}, {0x2126, 0x2126}, {0x2128, 0x2128}, + {0x212a, 0x212d}, {0x212f, 0x2131}, {0x2133, 0x2139}, {0x213d, 0x213f}, + {0x2145, 0x2149}, {0x3005, 0x3006}, {0x3031, 0x3035}, {0x303b, 0x303c}, + {0x3041, 0x3096}, {0x309d, 0x309f}, {0x30a1, 0x30fa}, {0x30fc, 0x30ff}, + {0x3105, 0x312c}, {0x3131, 0x318e}, {0x31a0, 0x31b7}, {0x31f0, 0x31ff}, + {0x3400, 0x3400}, {0x4db5, 0x4db5}, {0x4e00, 0x4e00}, {0x9fa5, 0x9fa5}, + {0xa000, 0xa48c}, {0xac00, 0xac00}, {0xd7a3, 0xd7a3}, {0xf900, 0xfa2d}, + {0xfa30, 0xfa6a}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1d, 0xfb1d}, + {0xfb1f, 0xfb28}, {0xfb2a, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb3e, 0xfb3e}, + {0xfb40, 0xfb41}, {0xfb43, 0xfb44}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfd3d}, + {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, {0xfe70, 0xfe74}, + {0xfe76, 0xfefc}, {0xff21, 0xff3a}, {0xff41, 0xff5a}, {0xff66, 0xffbe}, + {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, {0xffda, 0xffdc} }; +static const xmlChLRange xmlLL[] = {{0x10000, 0x1000b}, {0x1000d, 0x10026}, + {0x10028, 0x1003a}, {0x1003c, 0x1003d}, {0x1003f, 0x1004d}, + {0x10050, 0x1005d}, {0x10080, 0x100fa}, {0x10300, 0x1031e}, + {0x10330, 0x10349}, {0x10380, 0x1039d}, {0x10400, 0x1049d}, + {0x10800, 0x10805}, {0x10808, 0x10808}, {0x1080a, 0x10835}, + {0x10837, 0x10838}, {0x1083c, 0x1083c}, {0x1083f, 0x1083f}, + {0x1d400, 0x1d454}, {0x1d456, 0x1d49c}, {0x1d49e, 0x1d49f}, + {0x1d4a2, 0x1d4a2}, {0x1d4a5, 0x1d4a6}, {0x1d4a9, 0x1d4ac}, + {0x1d4ae, 0x1d4b9}, {0x1d4bb, 0x1d4bb}, {0x1d4bd, 0x1d4c3}, + {0x1d4c5, 0x1d505}, {0x1d507, 0x1d50a}, {0x1d50d, 0x1d514}, + {0x1d516, 0x1d51c}, {0x1d51e, 0x1d539}, {0x1d53b, 0x1d53e}, + {0x1d540, 0x1d544}, {0x1d546, 0x1d546}, {0x1d54a, 0x1d550}, + {0x1d552, 0x1d6a3}, {0x1d6a8, 0x1d6c0}, {0x1d6c2, 0x1d6da}, + {0x1d6dc, 0x1d6fa}, {0x1d6fc, 0x1d714}, {0x1d716, 0x1d734}, + {0x1d736, 0x1d74e}, {0x1d750, 0x1d76e}, {0x1d770, 0x1d788}, + {0x1d78a, 0x1d7a8}, {0x1d7aa, 0x1d7c2}, {0x1d7c4, 0x1d7c9}, + {0x20000, 0x20000}, {0x2a6d6, 0x2a6d6}, {0x2f800, 0x2fa1d} }; +static xmlChRangeGroup xmlLG = {279,50,xmlLS,xmlLL}; + +static const xmlChSRange xmlLlS[] = {{0x61, 0x7a}, {0xaa, 0xaa}, + {0xb5, 0xb5}, {0xba, 0xba}, {0xdf, 0xf6}, {0xf8, 0xff}, {0x101, 0x101}, + {0x103, 0x103}, {0x105, 0x105}, {0x107, 0x107}, {0x109, 0x109}, + {0x10b, 0x10b}, {0x10d, 0x10d}, {0x10f, 0x10f}, {0x111, 0x111}, + {0x113, 0x113}, {0x115, 0x115}, {0x117, 0x117}, {0x119, 0x119}, + {0x11b, 0x11b}, {0x11d, 0x11d}, {0x11f, 0x11f}, {0x121, 0x121}, + {0x123, 0x123}, {0x125, 0x125}, {0x127, 0x127}, {0x129, 0x129}, + {0x12b, 0x12b}, {0x12d, 0x12d}, {0x12f, 0x12f}, {0x131, 0x131}, + {0x133, 0x133}, {0x135, 0x135}, {0x137, 0x138}, {0x13a, 0x13a}, + {0x13c, 0x13c}, {0x13e, 0x13e}, {0x140, 0x140}, {0x142, 0x142}, + {0x144, 0x144}, {0x146, 0x146}, {0x148, 0x149}, {0x14b, 0x14b}, + {0x14d, 0x14d}, {0x14f, 0x14f}, {0x151, 0x151}, {0x153, 0x153}, + {0x155, 0x155}, {0x157, 0x157}, {0x159, 0x159}, {0x15b, 0x15b}, + {0x15d, 0x15d}, {0x15f, 0x15f}, {0x161, 0x161}, {0x163, 0x163}, + {0x165, 0x165}, {0x167, 0x167}, {0x169, 0x169}, {0x16b, 0x16b}, + {0x16d, 0x16d}, {0x16f, 0x16f}, {0x171, 0x171}, {0x173, 0x173}, + {0x175, 0x175}, {0x177, 0x177}, {0x17a, 0x17a}, {0x17c, 0x17c}, + {0x17e, 0x180}, {0x183, 0x183}, {0x185, 0x185}, {0x188, 0x188}, + {0x18c, 0x18d}, {0x192, 0x192}, {0x195, 0x195}, {0x199, 0x19b}, + {0x19e, 0x19e}, {0x1a1, 0x1a1}, {0x1a3, 0x1a3}, {0x1a5, 0x1a5}, + {0x1a8, 0x1a8}, {0x1aa, 0x1ab}, {0x1ad, 0x1ad}, {0x1b0, 0x1b0}, + {0x1b4, 0x1b4}, {0x1b6, 0x1b6}, {0x1b9, 0x1ba}, {0x1bd, 0x1bf}, + {0x1c6, 0x1c6}, {0x1c9, 0x1c9}, {0x1cc, 0x1cc}, {0x1ce, 0x1ce}, + {0x1d0, 0x1d0}, {0x1d2, 0x1d2}, {0x1d4, 0x1d4}, {0x1d6, 0x1d6}, + {0x1d8, 0x1d8}, {0x1da, 0x1da}, {0x1dc, 0x1dd}, {0x1df, 0x1df}, + {0x1e1, 0x1e1}, {0x1e3, 0x1e3}, {0x1e5, 0x1e5}, {0x1e7, 0x1e7}, + {0x1e9, 0x1e9}, {0x1eb, 0x1eb}, {0x1ed, 0x1ed}, {0x1ef, 0x1f0}, + {0x1f3, 0x1f3}, {0x1f5, 0x1f5}, {0x1f9, 0x1f9}, {0x1fb, 0x1fb}, + {0x1fd, 0x1fd}, {0x1ff, 0x1ff}, {0x201, 0x201}, {0x203, 0x203}, + {0x205, 0x205}, {0x207, 0x207}, {0x209, 0x209}, {0x20b, 0x20b}, + {0x20d, 0x20d}, {0x20f, 0x20f}, {0x211, 0x211}, {0x213, 0x213}, + {0x215, 0x215}, {0x217, 0x217}, {0x219, 0x219}, {0x21b, 0x21b}, + {0x21d, 0x21d}, {0x21f, 0x21f}, {0x221, 0x221}, {0x223, 0x223}, + {0x225, 0x225}, {0x227, 0x227}, {0x229, 0x229}, {0x22b, 0x22b}, + {0x22d, 0x22d}, {0x22f, 0x22f}, {0x231, 0x231}, {0x233, 0x236}, + {0x250, 0x2af}, {0x390, 0x390}, {0x3ac, 0x3ce}, {0x3d0, 0x3d1}, + {0x3d5, 0x3d7}, {0x3d9, 0x3d9}, {0x3db, 0x3db}, {0x3dd, 0x3dd}, + {0x3df, 0x3df}, {0x3e1, 0x3e1}, {0x3e3, 0x3e3}, {0x3e5, 0x3e5}, + {0x3e7, 0x3e7}, {0x3e9, 0x3e9}, {0x3eb, 0x3eb}, {0x3ed, 0x3ed}, + {0x3ef, 0x3f3}, {0x3f5, 0x3f5}, {0x3f8, 0x3f8}, {0x3fb, 0x3fb}, + {0x430, 0x45f}, {0x461, 0x461}, {0x463, 0x463}, {0x465, 0x465}, + {0x467, 0x467}, {0x469, 0x469}, {0x46b, 0x46b}, {0x46d, 0x46d}, + {0x46f, 0x46f}, {0x471, 0x471}, {0x473, 0x473}, {0x475, 0x475}, + {0x477, 0x477}, {0x479, 0x479}, {0x47b, 0x47b}, {0x47d, 0x47d}, + {0x47f, 0x47f}, {0x481, 0x481}, {0x48b, 0x48b}, {0x48d, 0x48d}, + {0x48f, 0x48f}, {0x491, 0x491}, {0x493, 0x493}, {0x495, 0x495}, + {0x497, 0x497}, {0x499, 0x499}, {0x49b, 0x49b}, {0x49d, 0x49d}, + {0x49f, 0x49f}, {0x4a1, 0x4a1}, {0x4a3, 0x4a3}, {0x4a5, 0x4a5}, + {0x4a7, 0x4a7}, {0x4a9, 0x4a9}, {0x4ab, 0x4ab}, {0x4ad, 0x4ad}, + {0x4af, 0x4af}, {0x4b1, 0x4b1}, {0x4b3, 0x4b3}, {0x4b5, 0x4b5}, + {0x4b7, 0x4b7}, {0x4b9, 0x4b9}, {0x4bb, 0x4bb}, {0x4bd, 0x4bd}, + {0x4bf, 0x4bf}, {0x4c2, 0x4c2}, {0x4c4, 0x4c4}, {0x4c6, 0x4c6}, + {0x4c8, 0x4c8}, {0x4ca, 0x4ca}, {0x4cc, 0x4cc}, {0x4ce, 0x4ce}, + {0x4d1, 0x4d1}, {0x4d3, 0x4d3}, {0x4d5, 0x4d5}, {0x4d7, 0x4d7}, + {0x4d9, 0x4d9}, {0x4db, 0x4db}, {0x4dd, 0x4dd}, {0x4df, 0x4df}, + {0x4e1, 0x4e1}, {0x4e3, 0x4e3}, {0x4e5, 0x4e5}, {0x4e7, 0x4e7}, + {0x4e9, 0x4e9}, {0x4eb, 0x4eb}, {0x4ed, 0x4ed}, {0x4ef, 0x4ef}, + {0x4f1, 0x4f1}, {0x4f3, 0x4f3}, {0x4f5, 0x4f5}, {0x4f9, 0x4f9}, + {0x501, 0x501}, {0x503, 0x503}, {0x505, 0x505}, {0x507, 0x507}, + {0x509, 0x509}, {0x50b, 0x50b}, {0x50d, 0x50d}, {0x50f, 0x50f}, + {0x561, 0x587}, {0x1d00, 0x1d2b}, {0x1d62, 0x1d6b}, {0x1e01, 0x1e01}, + {0x1e03, 0x1e03}, {0x1e05, 0x1e05}, {0x1e07, 0x1e07}, {0x1e09, 0x1e09}, + {0x1e0b, 0x1e0b}, {0x1e0d, 0x1e0d}, {0x1e0f, 0x1e0f}, {0x1e11, 0x1e11}, + {0x1e13, 0x1e13}, {0x1e15, 0x1e15}, {0x1e17, 0x1e17}, {0x1e19, 0x1e19}, + {0x1e1b, 0x1e1b}, {0x1e1d, 0x1e1d}, {0x1e1f, 0x1e1f}, {0x1e21, 0x1e21}, + {0x1e23, 0x1e23}, {0x1e25, 0x1e25}, {0x1e27, 0x1e27}, {0x1e29, 0x1e29}, + {0x1e2b, 0x1e2b}, {0x1e2d, 0x1e2d}, {0x1e2f, 0x1e2f}, {0x1e31, 0x1e31}, + {0x1e33, 0x1e33}, {0x1e35, 0x1e35}, {0x1e37, 0x1e37}, {0x1e39, 0x1e39}, + {0x1e3b, 0x1e3b}, {0x1e3d, 0x1e3d}, {0x1e3f, 0x1e3f}, {0x1e41, 0x1e41}, + {0x1e43, 0x1e43}, {0x1e45, 0x1e45}, {0x1e47, 0x1e47}, {0x1e49, 0x1e49}, + {0x1e4b, 0x1e4b}, {0x1e4d, 0x1e4d}, {0x1e4f, 0x1e4f}, {0x1e51, 0x1e51}, + {0x1e53, 0x1e53}, {0x1e55, 0x1e55}, {0x1e57, 0x1e57}, {0x1e59, 0x1e59}, + {0x1e5b, 0x1e5b}, {0x1e5d, 0x1e5d}, {0x1e5f, 0x1e5f}, {0x1e61, 0x1e61}, + {0x1e63, 0x1e63}, {0x1e65, 0x1e65}, {0x1e67, 0x1e67}, {0x1e69, 0x1e69}, + {0x1e6b, 0x1e6b}, {0x1e6d, 0x1e6d}, {0x1e6f, 0x1e6f}, {0x1e71, 0x1e71}, + {0x1e73, 0x1e73}, {0x1e75, 0x1e75}, {0x1e77, 0x1e77}, {0x1e79, 0x1e79}, + {0x1e7b, 0x1e7b}, {0x1e7d, 0x1e7d}, {0x1e7f, 0x1e7f}, {0x1e81, 0x1e81}, + {0x1e83, 0x1e83}, {0x1e85, 0x1e85}, {0x1e87, 0x1e87}, {0x1e89, 0x1e89}, + {0x1e8b, 0x1e8b}, {0x1e8d, 0x1e8d}, {0x1e8f, 0x1e8f}, {0x1e91, 0x1e91}, + {0x1e93, 0x1e93}, {0x1e95, 0x1e9b}, {0x1ea1, 0x1ea1}, {0x1ea3, 0x1ea3}, + {0x1ea5, 0x1ea5}, {0x1ea7, 0x1ea7}, {0x1ea9, 0x1ea9}, {0x1eab, 0x1eab}, + {0x1ead, 0x1ead}, {0x1eaf, 0x1eaf}, {0x1eb1, 0x1eb1}, {0x1eb3, 0x1eb3}, + {0x1eb5, 0x1eb5}, {0x1eb7, 0x1eb7}, {0x1eb9, 0x1eb9}, {0x1ebb, 0x1ebb}, + {0x1ebd, 0x1ebd}, {0x1ebf, 0x1ebf}, {0x1ec1, 0x1ec1}, {0x1ec3, 0x1ec3}, + {0x1ec5, 0x1ec5}, {0x1ec7, 0x1ec7}, {0x1ec9, 0x1ec9}, {0x1ecb, 0x1ecb}, + {0x1ecd, 0x1ecd}, {0x1ecf, 0x1ecf}, {0x1ed1, 0x1ed1}, {0x1ed3, 0x1ed3}, + {0x1ed5, 0x1ed5}, {0x1ed7, 0x1ed7}, {0x1ed9, 0x1ed9}, {0x1edb, 0x1edb}, + {0x1edd, 0x1edd}, {0x1edf, 0x1edf}, {0x1ee1, 0x1ee1}, {0x1ee3, 0x1ee3}, + {0x1ee5, 0x1ee5}, {0x1ee7, 0x1ee7}, {0x1ee9, 0x1ee9}, {0x1eeb, 0x1eeb}, + {0x1eed, 0x1eed}, {0x1eef, 0x1eef}, {0x1ef1, 0x1ef1}, {0x1ef3, 0x1ef3}, + {0x1ef5, 0x1ef5}, {0x1ef7, 0x1ef7}, {0x1ef9, 0x1ef9}, {0x1f00, 0x1f07}, + {0x1f10, 0x1f15}, {0x1f20, 0x1f27}, {0x1f30, 0x1f37}, {0x1f40, 0x1f45}, + {0x1f50, 0x1f57}, {0x1f60, 0x1f67}, {0x1f70, 0x1f7d}, {0x1f80, 0x1f87}, + {0x1f90, 0x1f97}, {0x1fa0, 0x1fa7}, {0x1fb0, 0x1fb4}, {0x1fb6, 0x1fb7}, + {0x1fbe, 0x1fbe}, {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fc7}, {0x1fd0, 0x1fd3}, + {0x1fd6, 0x1fd7}, {0x1fe0, 0x1fe7}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ff7}, + {0x2071, 0x2071}, {0x207f, 0x207f}, {0x210a, 0x210a}, {0x210e, 0x210f}, + {0x2113, 0x2113}, {0x212f, 0x212f}, {0x2134, 0x2134}, {0x2139, 0x2139}, + {0x213d, 0x213d}, {0x2146, 0x2149}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, + {0xff41, 0xff5a} }; +static const xmlChLRange xmlLlL[] = {{0x10428, 0x1044f}, {0x1d41a, 0x1d433}, + {0x1d44e, 0x1d454}, {0x1d456, 0x1d467}, {0x1d482, 0x1d49b}, + {0x1d4b6, 0x1d4b9}, {0x1d4bb, 0x1d4bb}, {0x1d4bd, 0x1d4c3}, + {0x1d4c5, 0x1d4cf}, {0x1d4ea, 0x1d503}, {0x1d51e, 0x1d537}, + {0x1d552, 0x1d56b}, {0x1d586, 0x1d59f}, {0x1d5ba, 0x1d5d3}, + {0x1d5ee, 0x1d607}, {0x1d622, 0x1d63b}, {0x1d656, 0x1d66f}, + {0x1d68a, 0x1d6a3}, {0x1d6c2, 0x1d6da}, {0x1d6dc, 0x1d6e1}, + {0x1d6fc, 0x1d714}, {0x1d716, 0x1d71b}, {0x1d736, 0x1d74e}, + {0x1d750, 0x1d755}, {0x1d770, 0x1d788}, {0x1d78a, 0x1d78f}, + {0x1d7aa, 0x1d7c2}, {0x1d7c4, 0x1d7c9} }; +static xmlChRangeGroup xmlLlG = {396,28,xmlLlS,xmlLlL}; + +static const xmlChSRange xmlLmS[] = {{0x2b0, 0x2c1}, {0x2c6, 0x2d1}, + {0x2e0, 0x2e4}, {0x2ee, 0x2ee}, {0x37a, 0x37a}, {0x559, 0x559}, + {0x640, 0x640}, {0x6e5, 0x6e6}, {0xe46, 0xe46}, {0xec6, 0xec6}, + {0x17d7, 0x17d7}, {0x1843, 0x1843}, {0x1d2c, 0x1d61}, {0x3005, 0x3005}, + {0x3031, 0x3035}, {0x303b, 0x303b}, {0x309d, 0x309e}, {0x30fc, 0x30fe}, + {0xff70, 0xff70}, {0xff9e, 0xff9f} }; +static xmlChRangeGroup xmlLmG = {20,0,xmlLmS,NULL}; + +static const xmlChSRange xmlLoS[] = {{0x1bb, 0x1bb}, {0x1c0, 0x1c3}, + {0x5d0, 0x5ea}, {0x5f0, 0x5f2}, {0x621, 0x63a}, {0x641, 0x64a}, + {0x66e, 0x66f}, {0x671, 0x6d3}, {0x6d5, 0x6d5}, {0x6ee, 0x6ef}, + {0x6fa, 0x6fc}, {0x6ff, 0x6ff}, {0x710, 0x710}, {0x712, 0x72f}, + {0x74d, 0x74f}, {0x780, 0x7a5}, {0x7b1, 0x7b1}, {0x904, 0x939}, + {0x93d, 0x93d}, {0x950, 0x950}, {0x958, 0x961}, {0x985, 0x98c}, + {0x98f, 0x990}, {0x993, 0x9a8}, {0x9aa, 0x9b0}, {0x9b2, 0x9b2}, + {0x9b6, 0x9b9}, {0x9bd, 0x9bd}, {0x9dc, 0x9dd}, {0x9df, 0x9e1}, + {0x9f0, 0x9f1}, {0xa05, 0xa0a}, {0xa0f, 0xa10}, {0xa13, 0xa28}, + {0xa2a, 0xa30}, {0xa32, 0xa33}, {0xa35, 0xa36}, {0xa38, 0xa39}, + {0xa59, 0xa5c}, {0xa5e, 0xa5e}, {0xa72, 0xa74}, {0xa85, 0xa8d}, + {0xa8f, 0xa91}, {0xa93, 0xaa8}, {0xaaa, 0xab0}, {0xab2, 0xab3}, + {0xab5, 0xab9}, {0xabd, 0xabd}, {0xad0, 0xad0}, {0xae0, 0xae1}, + {0xb05, 0xb0c}, {0xb0f, 0xb10}, {0xb13, 0xb28}, {0xb2a, 0xb30}, + {0xb32, 0xb33}, {0xb35, 0xb39}, {0xb3d, 0xb3d}, {0xb5c, 0xb5d}, + {0xb5f, 0xb61}, {0xb71, 0xb71}, {0xb83, 0xb83}, {0xb85, 0xb8a}, + {0xb8e, 0xb90}, {0xb92, 0xb95}, {0xb99, 0xb9a}, {0xb9c, 0xb9c}, + {0xb9e, 0xb9f}, {0xba3, 0xba4}, {0xba8, 0xbaa}, {0xbae, 0xbb5}, + {0xbb7, 0xbb9}, {0xc05, 0xc0c}, {0xc0e, 0xc10}, {0xc12, 0xc28}, + {0xc2a, 0xc33}, {0xc35, 0xc39}, {0xc60, 0xc61}, {0xc85, 0xc8c}, + {0xc8e, 0xc90}, {0xc92, 0xca8}, {0xcaa, 0xcb3}, {0xcb5, 0xcb9}, + {0xcbd, 0xcbd}, {0xcde, 0xcde}, {0xce0, 0xce1}, {0xd05, 0xd0c}, + {0xd0e, 0xd10}, {0xd12, 0xd28}, {0xd2a, 0xd39}, {0xd60, 0xd61}, + {0xd85, 0xd96}, {0xd9a, 0xdb1}, {0xdb3, 0xdbb}, {0xdbd, 0xdbd}, + {0xdc0, 0xdc6}, {0xe01, 0xe30}, {0xe32, 0xe33}, {0xe40, 0xe45}, + {0xe81, 0xe82}, {0xe84, 0xe84}, {0xe87, 0xe88}, {0xe8a, 0xe8a}, + {0xe8d, 0xe8d}, {0xe94, 0xe97}, {0xe99, 0xe9f}, {0xea1, 0xea3}, + {0xea5, 0xea5}, {0xea7, 0xea7}, {0xeaa, 0xeab}, {0xead, 0xeb0}, + {0xeb2, 0xeb3}, {0xebd, 0xebd}, {0xec0, 0xec4}, {0xedc, 0xedd}, + {0xf00, 0xf00}, {0xf40, 0xf47}, {0xf49, 0xf6a}, {0xf88, 0xf8b}, + {0x1000, 0x1021}, {0x1023, 0x1027}, {0x1029, 0x102a}, {0x1050, 0x1055}, + {0x10d0, 0x10f8}, {0x1100, 0x1159}, {0x115f, 0x11a2}, {0x11a8, 0x11f9}, + {0x1200, 0x1206}, {0x1208, 0x1246}, {0x1248, 0x1248}, {0x124a, 0x124d}, + {0x1250, 0x1256}, {0x1258, 0x1258}, {0x125a, 0x125d}, {0x1260, 0x1286}, + {0x1288, 0x1288}, {0x128a, 0x128d}, {0x1290, 0x12ae}, {0x12b0, 0x12b0}, + {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c0, 0x12c0}, {0x12c2, 0x12c5}, + {0x12c8, 0x12ce}, {0x12d0, 0x12d6}, {0x12d8, 0x12ee}, {0x12f0, 0x130e}, + {0x1310, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x131e}, {0x1320, 0x1346}, + {0x1348, 0x135a}, {0x13a0, 0x13f4}, {0x1401, 0x166c}, {0x166f, 0x1676}, + {0x1681, 0x169a}, {0x16a0, 0x16ea}, {0x1700, 0x170c}, {0x170e, 0x1711}, + {0x1720, 0x1731}, {0x1740, 0x1751}, {0x1760, 0x176c}, {0x176e, 0x1770}, + {0x1780, 0x17b3}, {0x17dc, 0x17dc}, {0x1820, 0x1842}, {0x1844, 0x1877}, + {0x1880, 0x18a8}, {0x1900, 0x191c}, {0x1950, 0x196d}, {0x1970, 0x1974}, + {0x2135, 0x2138}, {0x3006, 0x3006}, {0x303c, 0x303c}, {0x3041, 0x3096}, + {0x309f, 0x309f}, {0x30a1, 0x30fa}, {0x30ff, 0x30ff}, {0x3105, 0x312c}, + {0x3131, 0x318e}, {0x31a0, 0x31b7}, {0x31f0, 0x31ff}, {0x3400, 0x3400}, + {0x4db5, 0x4db5}, {0x4e00, 0x4e00}, {0x9fa5, 0x9fa5}, {0xa000, 0xa48c}, + {0xac00, 0xac00}, {0xd7a3, 0xd7a3}, {0xf900, 0xfa2d}, {0xfa30, 0xfa6a}, + {0xfb1d, 0xfb1d}, {0xfb1f, 0xfb28}, {0xfb2a, 0xfb36}, {0xfb38, 0xfb3c}, + {0xfb3e, 0xfb3e}, {0xfb40, 0xfb41}, {0xfb43, 0xfb44}, {0xfb46, 0xfbb1}, + {0xfbd3, 0xfd3d}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, + {0xfe70, 0xfe74}, {0xfe76, 0xfefc}, {0xff66, 0xff6f}, {0xff71, 0xff9d}, + {0xffa0, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, + {0xffda, 0xffdc} }; +static const xmlChLRange xmlLoL[] = {{0x10000, 0x1000b}, {0x1000d, 0x10026}, + {0x10028, 0x1003a}, {0x1003c, 0x1003d}, {0x1003f, 0x1004d}, + {0x10050, 0x1005d}, {0x10080, 0x100fa}, {0x10300, 0x1031e}, + {0x10330, 0x10349}, {0x10380, 0x1039d}, {0x10450, 0x1049d}, + {0x10800, 0x10805}, {0x10808, 0x10808}, {0x1080a, 0x10835}, + {0x10837, 0x10838}, {0x1083c, 0x1083c}, {0x1083f, 0x1083f}, + {0x20000, 0x20000}, {0x2a6d6, 0x2a6d6}, {0x2f800, 0x2fa1d} }; +static xmlChRangeGroup xmlLoG = {211,20,xmlLoS,xmlLoL}; + +static const xmlChSRange xmlLtS[] = {{0x1c5, 0x1c5}, {0x1c8, 0x1c8}, + {0x1cb, 0x1cb}, {0x1f2, 0x1f2}, {0x1f88, 0x1f8f}, {0x1f98, 0x1f9f}, + {0x1fa8, 0x1faf}, {0x1fbc, 0x1fbc}, {0x1fcc, 0x1fcc}, {0x1ffc, 0x1ffc} }; +static xmlChRangeGroup xmlLtG = {10,0,xmlLtS,NULL}; + +static const xmlChSRange xmlLuS[] = {{0x41, 0x5a}, {0xc0, 0xd6}, + {0xd8, 0xde}, {0x100, 0x100}, {0x102, 0x102}, {0x104, 0x104}, + {0x106, 0x106}, {0x108, 0x108}, {0x10a, 0x10a}, {0x10c, 0x10c}, + {0x10e, 0x10e}, {0x110, 0x110}, {0x112, 0x112}, {0x114, 0x114}, + {0x116, 0x116}, {0x118, 0x118}, {0x11a, 0x11a}, {0x11c, 0x11c}, + {0x11e, 0x11e}, {0x120, 0x120}, {0x122, 0x122}, {0x124, 0x124}, + {0x126, 0x126}, {0x128, 0x128}, {0x12a, 0x12a}, {0x12c, 0x12c}, + {0x12e, 0x12e}, {0x130, 0x130}, {0x132, 0x132}, {0x134, 0x134}, + {0x136, 0x136}, {0x139, 0x139}, {0x13b, 0x13b}, {0x13d, 0x13d}, + {0x13f, 0x13f}, {0x141, 0x141}, {0x143, 0x143}, {0x145, 0x145}, + {0x147, 0x147}, {0x14a, 0x14a}, {0x14c, 0x14c}, {0x14e, 0x14e}, + {0x150, 0x150}, {0x152, 0x152}, {0x154, 0x154}, {0x156, 0x156}, + {0x158, 0x158}, {0x15a, 0x15a}, {0x15c, 0x15c}, {0x15e, 0x15e}, + {0x160, 0x160}, {0x162, 0x162}, {0x164, 0x164}, {0x166, 0x166}, + {0x168, 0x168}, {0x16a, 0x16a}, {0x16c, 0x16c}, {0x16e, 0x16e}, + {0x170, 0x170}, {0x172, 0x172}, {0x174, 0x174}, {0x176, 0x176}, + {0x178, 0x179}, {0x17b, 0x17b}, {0x17d, 0x17d}, {0x181, 0x182}, + {0x184, 0x184}, {0x186, 0x187}, {0x189, 0x18b}, {0x18e, 0x191}, + {0x193, 0x194}, {0x196, 0x198}, {0x19c, 0x19d}, {0x19f, 0x1a0}, + {0x1a2, 0x1a2}, {0x1a4, 0x1a4}, {0x1a6, 0x1a7}, {0x1a9, 0x1a9}, + {0x1ac, 0x1ac}, {0x1ae, 0x1af}, {0x1b1, 0x1b3}, {0x1b5, 0x1b5}, + {0x1b7, 0x1b8}, {0x1bc, 0x1bc}, {0x1c4, 0x1c4}, {0x1c7, 0x1c7}, + {0x1ca, 0x1ca}, {0x1cd, 0x1cd}, {0x1cf, 0x1cf}, {0x1d1, 0x1d1}, + {0x1d3, 0x1d3}, {0x1d5, 0x1d5}, {0x1d7, 0x1d7}, {0x1d9, 0x1d9}, + {0x1db, 0x1db}, {0x1de, 0x1de}, {0x1e0, 0x1e0}, {0x1e2, 0x1e2}, + {0x1e4, 0x1e4}, {0x1e6, 0x1e6}, {0x1e8, 0x1e8}, {0x1ea, 0x1ea}, + {0x1ec, 0x1ec}, {0x1ee, 0x1ee}, {0x1f1, 0x1f1}, {0x1f4, 0x1f4}, + {0x1f6, 0x1f8}, {0x1fa, 0x1fa}, {0x1fc, 0x1fc}, {0x1fe, 0x1fe}, + {0x200, 0x200}, {0x202, 0x202}, {0x204, 0x204}, {0x206, 0x206}, + {0x208, 0x208}, {0x20a, 0x20a}, {0x20c, 0x20c}, {0x20e, 0x20e}, + {0x210, 0x210}, {0x212, 0x212}, {0x214, 0x214}, {0x216, 0x216}, + {0x218, 0x218}, {0x21a, 0x21a}, {0x21c, 0x21c}, {0x21e, 0x21e}, + {0x220, 0x220}, {0x222, 0x222}, {0x224, 0x224}, {0x226, 0x226}, + {0x228, 0x228}, {0x22a, 0x22a}, {0x22c, 0x22c}, {0x22e, 0x22e}, + {0x230, 0x230}, {0x232, 0x232}, {0x386, 0x386}, {0x388, 0x38a}, + {0x38c, 0x38c}, {0x38e, 0x38f}, {0x391, 0x3a1}, {0x3a3, 0x3ab}, + {0x3d2, 0x3d4}, {0x3d8, 0x3d8}, {0x3da, 0x3da}, {0x3dc, 0x3dc}, + {0x3de, 0x3de}, {0x3e0, 0x3e0}, {0x3e2, 0x3e2}, {0x3e4, 0x3e4}, + {0x3e6, 0x3e6}, {0x3e8, 0x3e8}, {0x3ea, 0x3ea}, {0x3ec, 0x3ec}, + {0x3ee, 0x3ee}, {0x3f4, 0x3f4}, {0x3f7, 0x3f7}, {0x3f9, 0x3fa}, + {0x400, 0x42f}, {0x460, 0x460}, {0x462, 0x462}, {0x464, 0x464}, + {0x466, 0x466}, {0x468, 0x468}, {0x46a, 0x46a}, {0x46c, 0x46c}, + {0x46e, 0x46e}, {0x470, 0x470}, {0x472, 0x472}, {0x474, 0x474}, + {0x476, 0x476}, {0x478, 0x478}, {0x47a, 0x47a}, {0x47c, 0x47c}, + {0x47e, 0x47e}, {0x480, 0x480}, {0x48a, 0x48a}, {0x48c, 0x48c}, + {0x48e, 0x48e}, {0x490, 0x490}, {0x492, 0x492}, {0x494, 0x494}, + {0x496, 0x496}, {0x498, 0x498}, {0x49a, 0x49a}, {0x49c, 0x49c}, + {0x49e, 0x49e}, {0x4a0, 0x4a0}, {0x4a2, 0x4a2}, {0x4a4, 0x4a4}, + {0x4a6, 0x4a6}, {0x4a8, 0x4a8}, {0x4aa, 0x4aa}, {0x4ac, 0x4ac}, + {0x4ae, 0x4ae}, {0x4b0, 0x4b0}, {0x4b2, 0x4b2}, {0x4b4, 0x4b4}, + {0x4b6, 0x4b6}, {0x4b8, 0x4b8}, {0x4ba, 0x4ba}, {0x4bc, 0x4bc}, + {0x4be, 0x4be}, {0x4c0, 0x4c1}, {0x4c3, 0x4c3}, {0x4c5, 0x4c5}, + {0x4c7, 0x4c7}, {0x4c9, 0x4c9}, {0x4cb, 0x4cb}, {0x4cd, 0x4cd}, + {0x4d0, 0x4d0}, {0x4d2, 0x4d2}, {0x4d4, 0x4d4}, {0x4d6, 0x4d6}, + {0x4d8, 0x4d8}, {0x4da, 0x4da}, {0x4dc, 0x4dc}, {0x4de, 0x4de}, + {0x4e0, 0x4e0}, {0x4e2, 0x4e2}, {0x4e4, 0x4e4}, {0x4e6, 0x4e6}, + {0x4e8, 0x4e8}, {0x4ea, 0x4ea}, {0x4ec, 0x4ec}, {0x4ee, 0x4ee}, + {0x4f0, 0x4f0}, {0x4f2, 0x4f2}, {0x4f4, 0x4f4}, {0x4f8, 0x4f8}, + {0x500, 0x500}, {0x502, 0x502}, {0x504, 0x504}, {0x506, 0x506}, + {0x508, 0x508}, {0x50a, 0x50a}, {0x50c, 0x50c}, {0x50e, 0x50e}, + {0x531, 0x556}, {0x10a0, 0x10c5}, {0x1e00, 0x1e00}, {0x1e02, 0x1e02}, + {0x1e04, 0x1e04}, {0x1e06, 0x1e06}, {0x1e08, 0x1e08}, {0x1e0a, 0x1e0a}, + {0x1e0c, 0x1e0c}, {0x1e0e, 0x1e0e}, {0x1e10, 0x1e10}, {0x1e12, 0x1e12}, + {0x1e14, 0x1e14}, {0x1e16, 0x1e16}, {0x1e18, 0x1e18}, {0x1e1a, 0x1e1a}, + {0x1e1c, 0x1e1c}, {0x1e1e, 0x1e1e}, {0x1e20, 0x1e20}, {0x1e22, 0x1e22}, + {0x1e24, 0x1e24}, {0x1e26, 0x1e26}, {0x1e28, 0x1e28}, {0x1e2a, 0x1e2a}, + {0x1e2c, 0x1e2c}, {0x1e2e, 0x1e2e}, {0x1e30, 0x1e30}, {0x1e32, 0x1e32}, + {0x1e34, 0x1e34}, {0x1e36, 0x1e36}, {0x1e38, 0x1e38}, {0x1e3a, 0x1e3a}, + {0x1e3c, 0x1e3c}, {0x1e3e, 0x1e3e}, {0x1e40, 0x1e40}, {0x1e42, 0x1e42}, + {0x1e44, 0x1e44}, {0x1e46, 0x1e46}, {0x1e48, 0x1e48}, {0x1e4a, 0x1e4a}, + {0x1e4c, 0x1e4c}, {0x1e4e, 0x1e4e}, {0x1e50, 0x1e50}, {0x1e52, 0x1e52}, + {0x1e54, 0x1e54}, {0x1e56, 0x1e56}, {0x1e58, 0x1e58}, {0x1e5a, 0x1e5a}, + {0x1e5c, 0x1e5c}, {0x1e5e, 0x1e5e}, {0x1e60, 0x1e60}, {0x1e62, 0x1e62}, + {0x1e64, 0x1e64}, {0x1e66, 0x1e66}, {0x1e68, 0x1e68}, {0x1e6a, 0x1e6a}, + {0x1e6c, 0x1e6c}, {0x1e6e, 0x1e6e}, {0x1e70, 0x1e70}, {0x1e72, 0x1e72}, + {0x1e74, 0x1e74}, {0x1e76, 0x1e76}, {0x1e78, 0x1e78}, {0x1e7a, 0x1e7a}, + {0x1e7c, 0x1e7c}, {0x1e7e, 0x1e7e}, {0x1e80, 0x1e80}, {0x1e82, 0x1e82}, + {0x1e84, 0x1e84}, {0x1e86, 0x1e86}, {0x1e88, 0x1e88}, {0x1e8a, 0x1e8a}, + {0x1e8c, 0x1e8c}, {0x1e8e, 0x1e8e}, {0x1e90, 0x1e90}, {0x1e92, 0x1e92}, + {0x1e94, 0x1e94}, {0x1ea0, 0x1ea0}, {0x1ea2, 0x1ea2}, {0x1ea4, 0x1ea4}, + {0x1ea6, 0x1ea6}, {0x1ea8, 0x1ea8}, {0x1eaa, 0x1eaa}, {0x1eac, 0x1eac}, + {0x1eae, 0x1eae}, {0x1eb0, 0x1eb0}, {0x1eb2, 0x1eb2}, {0x1eb4, 0x1eb4}, + {0x1eb6, 0x1eb6}, {0x1eb8, 0x1eb8}, {0x1eba, 0x1eba}, {0x1ebc, 0x1ebc}, + {0x1ebe, 0x1ebe}, {0x1ec0, 0x1ec0}, {0x1ec2, 0x1ec2}, {0x1ec4, 0x1ec4}, + {0x1ec6, 0x1ec6}, {0x1ec8, 0x1ec8}, {0x1eca, 0x1eca}, {0x1ecc, 0x1ecc}, + {0x1ece, 0x1ece}, {0x1ed0, 0x1ed0}, {0x1ed2, 0x1ed2}, {0x1ed4, 0x1ed4}, + {0x1ed6, 0x1ed6}, {0x1ed8, 0x1ed8}, {0x1eda, 0x1eda}, {0x1edc, 0x1edc}, + {0x1ede, 0x1ede}, {0x1ee0, 0x1ee0}, {0x1ee2, 0x1ee2}, {0x1ee4, 0x1ee4}, + {0x1ee6, 0x1ee6}, {0x1ee8, 0x1ee8}, {0x1eea, 0x1eea}, {0x1eec, 0x1eec}, + {0x1eee, 0x1eee}, {0x1ef0, 0x1ef0}, {0x1ef2, 0x1ef2}, {0x1ef4, 0x1ef4}, + {0x1ef6, 0x1ef6}, {0x1ef8, 0x1ef8}, {0x1f08, 0x1f0f}, {0x1f18, 0x1f1d}, + {0x1f28, 0x1f2f}, {0x1f38, 0x1f3f}, {0x1f48, 0x1f4d}, {0x1f59, 0x1f59}, + {0x1f5b, 0x1f5b}, {0x1f5d, 0x1f5d}, {0x1f5f, 0x1f5f}, {0x1f68, 0x1f6f}, + {0x1fb8, 0x1fbb}, {0x1fc8, 0x1fcb}, {0x1fd8, 0x1fdb}, {0x1fe8, 0x1fec}, + {0x1ff8, 0x1ffb}, {0x2102, 0x2102}, {0x2107, 0x2107}, {0x210b, 0x210d}, + {0x2110, 0x2112}, {0x2115, 0x2115}, {0x2119, 0x211d}, {0x2124, 0x2124}, + {0x2126, 0x2126}, {0x2128, 0x2128}, {0x212a, 0x212d}, {0x2130, 0x2131}, + {0x2133, 0x2133}, {0x213e, 0x213f}, {0x2145, 0x2145}, {0xff21, 0xff3a} }; +static const xmlChLRange xmlLuL[] = {{0x10400, 0x10427}, {0x1d400, 0x1d419}, + {0x1d434, 0x1d44d}, {0x1d468, 0x1d481}, {0x1d49c, 0x1d49c}, + {0x1d49e, 0x1d49f}, {0x1d4a2, 0x1d4a2}, {0x1d4a5, 0x1d4a6}, + {0x1d4a9, 0x1d4ac}, {0x1d4ae, 0x1d4b5}, {0x1d4d0, 0x1d4e9}, + {0x1d504, 0x1d505}, {0x1d507, 0x1d50a}, {0x1d50d, 0x1d514}, + {0x1d516, 0x1d51c}, {0x1d538, 0x1d539}, {0x1d53b, 0x1d53e}, + {0x1d540, 0x1d544}, {0x1d546, 0x1d546}, {0x1d54a, 0x1d550}, + {0x1d56c, 0x1d585}, {0x1d5a0, 0x1d5b9}, {0x1d5d4, 0x1d5ed}, + {0x1d608, 0x1d621}, {0x1d63c, 0x1d655}, {0x1d670, 0x1d689}, + {0x1d6a8, 0x1d6c0}, {0x1d6e2, 0x1d6fa}, {0x1d71c, 0x1d734}, + {0x1d756, 0x1d76e}, {0x1d790, 0x1d7a8} }; +static xmlChRangeGroup xmlLuG = {390,31,xmlLuS,xmlLuL}; + +static const xmlChSRange xmlMS[] = {{0x300, 0x357}, {0x35d, 0x36f}, + {0x483, 0x486}, {0x488, 0x489}, {0x591, 0x5a1}, {0x5a3, 0x5b9}, + {0x5bb, 0x5bd}, {0x5bf, 0x5bf}, {0x5c1, 0x5c2}, {0x5c4, 0x5c4}, + {0x610, 0x615}, {0x64b, 0x658}, {0x670, 0x670}, {0x6d6, 0x6dc}, + {0x6de, 0x6e4}, {0x6e7, 0x6e8}, {0x6ea, 0x6ed}, {0x711, 0x711}, + {0x730, 0x74a}, {0x7a6, 0x7b0}, {0x901, 0x903}, {0x93c, 0x93c}, + {0x93e, 0x94d}, {0x951, 0x954}, {0x962, 0x963}, {0x981, 0x983}, + {0x9bc, 0x9bc}, {0x9be, 0x9c4}, {0x9c7, 0x9c8}, {0x9cb, 0x9cd}, + {0x9d7, 0x9d7}, {0x9e2, 0x9e3}, {0xa01, 0xa03}, {0xa3c, 0xa3c}, + {0xa3e, 0xa42}, {0xa47, 0xa48}, {0xa4b, 0xa4d}, {0xa70, 0xa71}, + {0xa81, 0xa83}, {0xabc, 0xabc}, {0xabe, 0xac5}, {0xac7, 0xac9}, + {0xacb, 0xacd}, {0xae2, 0xae3}, {0xb01, 0xb03}, {0xb3c, 0xb3c}, + {0xb3e, 0xb43}, {0xb47, 0xb48}, {0xb4b, 0xb4d}, {0xb56, 0xb57}, + {0xb82, 0xb82}, {0xbbe, 0xbc2}, {0xbc6, 0xbc8}, {0xbca, 0xbcd}, + {0xbd7, 0xbd7}, {0xc01, 0xc03}, {0xc3e, 0xc44}, {0xc46, 0xc48}, + {0xc4a, 0xc4d}, {0xc55, 0xc56}, {0xc82, 0xc83}, {0xcbc, 0xcbc}, + {0xcbe, 0xcc4}, {0xcc6, 0xcc8}, {0xcca, 0xccd}, {0xcd5, 0xcd6}, + {0xd02, 0xd03}, {0xd3e, 0xd43}, {0xd46, 0xd48}, {0xd4a, 0xd4d}, + {0xd57, 0xd57}, {0xd82, 0xd83}, {0xdca, 0xdca}, {0xdcf, 0xdd4}, + {0xdd6, 0xdd6}, {0xdd8, 0xddf}, {0xdf2, 0xdf3}, {0xe31, 0xe31}, + {0xe34, 0xe3a}, {0xe47, 0xe4e}, {0xeb1, 0xeb1}, {0xeb4, 0xeb9}, + {0xebb, 0xebc}, {0xec8, 0xecd}, {0xf18, 0xf19}, {0xf35, 0xf35}, + {0xf37, 0xf37}, {0xf39, 0xf39}, {0xf3e, 0xf3f}, {0xf71, 0xf84}, + {0xf86, 0xf87}, {0xf90, 0xf97}, {0xf99, 0xfbc}, {0xfc6, 0xfc6}, + {0x102c, 0x1032}, {0x1036, 0x1039}, {0x1056, 0x1059}, {0x1712, 0x1714}, + {0x1732, 0x1734}, {0x1752, 0x1753}, {0x1772, 0x1773}, {0x17b6, 0x17d3}, + {0x17dd, 0x17dd}, {0x180b, 0x180d}, {0x18a9, 0x18a9}, {0x1920, 0x192b}, + {0x1930, 0x193b}, {0x20d0, 0x20ea}, {0x302a, 0x302f}, {0x3099, 0x309a}, + {0xfb1e, 0xfb1e}, {0xfe00, 0xfe0f}, {0xfe20, 0xfe23} }; +static const xmlChLRange xmlML[] = {{0x1d165, 0x1d169}, {0x1d16d, 0x1d172}, + {0x1d17b, 0x1d182}, {0x1d185, 0x1d18b}, {0x1d1aa, 0x1d1ad}, + {0xe0100, 0xe01ef} }; +static xmlChRangeGroup xmlMG = {113,6,xmlMS,xmlML}; + +static const xmlChSRange xmlMcS[] = {{0x903, 0x903}, {0x93e, 0x940}, + {0x949, 0x94c}, {0x982, 0x983}, {0x9be, 0x9c0}, {0x9c7, 0x9c8}, + {0x9cb, 0x9cc}, {0x9d7, 0x9d7}, {0xa03, 0xa03}, {0xa3e, 0xa40}, + {0xa83, 0xa83}, {0xabe, 0xac0}, {0xac9, 0xac9}, {0xacb, 0xacc}, + {0xb02, 0xb03}, {0xb3e, 0xb3e}, {0xb40, 0xb40}, {0xb47, 0xb48}, + {0xb4b, 0xb4c}, {0xb57, 0xb57}, {0xbbe, 0xbbf}, {0xbc1, 0xbc2}, + {0xbc6, 0xbc8}, {0xbca, 0xbcc}, {0xbd7, 0xbd7}, {0xc01, 0xc03}, + {0xc41, 0xc44}, {0xc82, 0xc83}, {0xcbe, 0xcbe}, {0xcc0, 0xcc4}, + {0xcc7, 0xcc8}, {0xcca, 0xccb}, {0xcd5, 0xcd6}, {0xd02, 0xd03}, + {0xd3e, 0xd40}, {0xd46, 0xd48}, {0xd4a, 0xd4c}, {0xd57, 0xd57}, + {0xd82, 0xd83}, {0xdcf, 0xdd1}, {0xdd8, 0xddf}, {0xdf2, 0xdf3}, + {0xf3e, 0xf3f}, {0xf7f, 0xf7f}, {0x102c, 0x102c}, {0x1031, 0x1031}, + {0x1038, 0x1038}, {0x1056, 0x1057}, {0x17b6, 0x17b6}, {0x17be, 0x17c5}, + {0x17c7, 0x17c8}, {0x1923, 0x1926}, {0x1929, 0x192b}, {0x1930, 0x1931}, + {0x1933, 0x1938} }; +static const xmlChLRange xmlMcL[] = {{0x1d165, 0x1d166}, {0x1d16d, 0x1d172} }; +static xmlChRangeGroup xmlMcG = {55,2,xmlMcS,xmlMcL}; + +static const xmlChSRange xmlMnS[] = {{0x300, 0x357}, {0x35d, 0x36f}, + {0x483, 0x486}, {0x591, 0x5a1}, {0x5a3, 0x5b9}, {0x5bb, 0x5bd}, + {0x5bf, 0x5bf}, {0x5c1, 0x5c2}, {0x5c4, 0x5c4}, {0x610, 0x615}, + {0x64b, 0x658}, {0x670, 0x670}, {0x6d6, 0x6dc}, {0x6df, 0x6e4}, + {0x6e7, 0x6e8}, {0x6ea, 0x6ed}, {0x711, 0x711}, {0x730, 0x74a}, + {0x7a6, 0x7b0}, {0x901, 0x902}, {0x93c, 0x93c}, {0x941, 0x948}, + {0x94d, 0x94d}, {0x951, 0x954}, {0x962, 0x963}, {0x981, 0x981}, + {0x9bc, 0x9bc}, {0x9c1, 0x9c4}, {0x9cd, 0x9cd}, {0x9e2, 0x9e3}, + {0xa01, 0xa02}, {0xa3c, 0xa3c}, {0xa41, 0xa42}, {0xa47, 0xa48}, + {0xa4b, 0xa4d}, {0xa70, 0xa71}, {0xa81, 0xa82}, {0xabc, 0xabc}, + {0xac1, 0xac5}, {0xac7, 0xac8}, {0xacd, 0xacd}, {0xae2, 0xae3}, + {0xb01, 0xb01}, {0xb3c, 0xb3c}, {0xb3f, 0xb3f}, {0xb41, 0xb43}, + {0xb4d, 0xb4d}, {0xb56, 0xb56}, {0xb82, 0xb82}, {0xbc0, 0xbc0}, + {0xbcd, 0xbcd}, {0xc3e, 0xc40}, {0xc46, 0xc48}, {0xc4a, 0xc4d}, + {0xc55, 0xc56}, {0xcbc, 0xcbc}, {0xcbf, 0xcbf}, {0xcc6, 0xcc6}, + {0xccc, 0xccd}, {0xd41, 0xd43}, {0xd4d, 0xd4d}, {0xdca, 0xdca}, + {0xdd2, 0xdd4}, {0xdd6, 0xdd6}, {0xe31, 0xe31}, {0xe34, 0xe3a}, + {0xe47, 0xe4e}, {0xeb1, 0xeb1}, {0xeb4, 0xeb9}, {0xebb, 0xebc}, + {0xec8, 0xecd}, {0xf18, 0xf19}, {0xf35, 0xf35}, {0xf37, 0xf37}, + {0xf39, 0xf39}, {0xf71, 0xf7e}, {0xf80, 0xf84}, {0xf86, 0xf87}, + {0xf90, 0xf97}, {0xf99, 0xfbc}, {0xfc6, 0xfc6}, {0x102d, 0x1030}, + {0x1032, 0x1032}, {0x1036, 0x1037}, {0x1039, 0x1039}, {0x1058, 0x1059}, + {0x1712, 0x1714}, {0x1732, 0x1734}, {0x1752, 0x1753}, {0x1772, 0x1773}, + {0x17b7, 0x17bd}, {0x17c6, 0x17c6}, {0x17c9, 0x17d3}, {0x17dd, 0x17dd}, + {0x180b, 0x180d}, {0x18a9, 0x18a9}, {0x1920, 0x1922}, {0x1927, 0x1928}, + {0x1932, 0x1932}, {0x1939, 0x193b}, {0x20d0, 0x20dc}, {0x20e1, 0x20e1}, + {0x20e5, 0x20ea}, {0x302a, 0x302f}, {0x3099, 0x309a}, {0xfb1e, 0xfb1e}, + {0xfe00, 0xfe0f}, {0xfe20, 0xfe23} }; +static const xmlChLRange xmlMnL[] = {{0x1d167, 0x1d169}, {0x1d17b, 0x1d182}, + {0x1d185, 0x1d18b}, {0x1d1aa, 0x1d1ad}, {0xe0100, 0xe01ef} }; +static xmlChRangeGroup xmlMnG = {108,5,xmlMnS,xmlMnL}; + +static const xmlChSRange xmlNS[] = {{0x30, 0x39}, {0xb2, 0xb3}, + {0xb9, 0xb9}, {0xbc, 0xbe}, {0x660, 0x669}, {0x6f0, 0x6f9}, + {0x966, 0x96f}, {0x9e6, 0x9ef}, {0x9f4, 0x9f9}, {0xa66, 0xa6f}, + {0xae6, 0xaef}, {0xb66, 0xb6f}, {0xbe7, 0xbf2}, {0xc66, 0xc6f}, + {0xce6, 0xcef}, {0xd66, 0xd6f}, {0xe50, 0xe59}, {0xed0, 0xed9}, + {0xf20, 0xf33}, {0x1040, 0x1049}, {0x1369, 0x137c}, {0x16ee, 0x16f0}, + {0x17e0, 0x17e9}, {0x17f0, 0x17f9}, {0x1810, 0x1819}, {0x1946, 0x194f}, + {0x2070, 0x2070}, {0x2074, 0x2079}, {0x2080, 0x2089}, {0x2153, 0x2183}, + {0x2460, 0x249b}, {0x24ea, 0x24ff}, {0x2776, 0x2793}, {0x3007, 0x3007}, + {0x3021, 0x3029}, {0x3038, 0x303a}, {0x3192, 0x3195}, {0x3220, 0x3229}, + {0x3251, 0x325f}, {0x3280, 0x3289}, {0x32b1, 0x32bf}, {0xff10, 0xff19} }; +static const xmlChLRange xmlNL[] = {{0x10107, 0x10133}, {0x10320, 0x10323}, + {0x1034a, 0x1034a}, {0x104a0, 0x104a9}, {0x1d7ce, 0x1d7ff} }; +static xmlChRangeGroup xmlNG = {42,5,xmlNS,xmlNL}; + +static const xmlChSRange xmlNdS[] = {{0x30, 0x39}, {0x660, 0x669}, + {0x6f0, 0x6f9}, {0x966, 0x96f}, {0x9e6, 0x9ef}, {0xa66, 0xa6f}, + {0xae6, 0xaef}, {0xb66, 0xb6f}, {0xbe7, 0xbef}, {0xc66, 0xc6f}, + {0xce6, 0xcef}, {0xd66, 0xd6f}, {0xe50, 0xe59}, {0xed0, 0xed9}, + {0xf20, 0xf29}, {0x1040, 0x1049}, {0x1369, 0x1371}, {0x17e0, 0x17e9}, + {0x1810, 0x1819}, {0x1946, 0x194f}, {0xff10, 0xff19} }; +static const xmlChLRange xmlNdL[] = {{0x104a0, 0x104a9}, {0x1d7ce, 0x1d7ff} }; +static xmlChRangeGroup xmlNdG = {21,2,xmlNdS,xmlNdL}; + +static const xmlChSRange xmlNoS[] = {{0xb2, 0xb3}, {0xb9, 0xb9}, + {0xbc, 0xbe}, {0x9f4, 0x9f9}, {0xbf0, 0xbf2}, {0xf2a, 0xf33}, + {0x1372, 0x137c}, {0x17f0, 0x17f9}, {0x2070, 0x2070}, {0x2074, 0x2079}, + {0x2080, 0x2089}, {0x2153, 0x215f}, {0x2460, 0x249b}, {0x24ea, 0x24ff}, + {0x2776, 0x2793}, {0x3192, 0x3195}, {0x3220, 0x3229}, {0x3251, 0x325f}, + {0x3280, 0x3289}, {0x32b1, 0x32bf} }; +static const xmlChLRange xmlNoL[] = {{0x10107, 0x10133}, {0x10320, 0x10323} }; +static xmlChRangeGroup xmlNoG = {20,2,xmlNoS,xmlNoL}; + +static const xmlChSRange xmlPS[] = {{0x21, 0x23}, {0x25, 0x2a}, + {0x2c, 0x2f}, {0x3a, 0x3b}, {0x3f, 0x40}, {0x5b, 0x5d}, {0x5f, 0x5f}, + {0x7b, 0x7b}, {0x7d, 0x7d}, {0xa1, 0xa1}, {0xab, 0xab}, {0xb7, 0xb7}, + {0xbb, 0xbb}, {0xbf, 0xbf}, {0x37e, 0x37e}, {0x387, 0x387}, + {0x55a, 0x55f}, {0x589, 0x58a}, {0x5be, 0x5be}, {0x5c0, 0x5c0}, + {0x5c3, 0x5c3}, {0x5f3, 0x5f4}, {0x60c, 0x60d}, {0x61b, 0x61b}, + {0x61f, 0x61f}, {0x66a, 0x66d}, {0x6d4, 0x6d4}, {0x700, 0x70d}, + {0x964, 0x965}, {0x970, 0x970}, {0xdf4, 0xdf4}, {0xe4f, 0xe4f}, + {0xe5a, 0xe5b}, {0xf04, 0xf12}, {0xf3a, 0xf3d}, {0xf85, 0xf85}, + {0x104a, 0x104f}, {0x10fb, 0x10fb}, {0x1361, 0x1368}, {0x166d, 0x166e}, + {0x169b, 0x169c}, {0x16eb, 0x16ed}, {0x1735, 0x1736}, {0x17d4, 0x17d6}, + {0x17d8, 0x17da}, {0x1800, 0x180a}, {0x1944, 0x1945}, {0x2010, 0x2027}, + {0x2030, 0x2043}, {0x2045, 0x2051}, {0x2053, 0x2054}, {0x2057, 0x2057}, + {0x207d, 0x207e}, {0x208d, 0x208e}, {0x2329, 0x232a}, {0x23b4, 0x23b6}, + {0x2768, 0x2775}, {0x27e6, 0x27eb}, {0x2983, 0x2998}, {0x29d8, 0x29db}, + {0x29fc, 0x29fd}, {0x3001, 0x3003}, {0x3008, 0x3011}, {0x3014, 0x301f}, + {0x3030, 0x3030}, {0x303d, 0x303d}, {0x30a0, 0x30a0}, {0x30fb, 0x30fb}, + {0xfd3e, 0xfd3f}, {0xfe30, 0xfe52}, {0xfe54, 0xfe61}, {0xfe63, 0xfe63}, + {0xfe68, 0xfe68}, {0xfe6a, 0xfe6b}, {0xff01, 0xff03}, {0xff05, 0xff0a}, + {0xff0c, 0xff0f}, {0xff1a, 0xff1b}, {0xff1f, 0xff20}, {0xff3b, 0xff3d}, + {0xff3f, 0xff3f}, {0xff5b, 0xff5b}, {0xff5d, 0xff5d}, {0xff5f, 0xff65} }; +static const xmlChLRange xmlPL[] = {{0x10100, 0x10101}, {0x1039f, 0x1039f} }; +static xmlChRangeGroup xmlPG = {84,2,xmlPS,xmlPL}; + +static const xmlChSRange xmlPdS[] = {{0x2d, 0x2d}, {0x58a, 0x58a}, + {0x1806, 0x1806}, {0x2010, 0x2015}, {0x301c, 0x301c}, {0x3030, 0x3030}, + {0x30a0, 0x30a0}, {0xfe31, 0xfe32}, {0xfe58, 0xfe58}, {0xfe63, 0xfe63}, + {0xff0d, 0xff0d} }; +static xmlChRangeGroup xmlPdG = {11,0,xmlPdS,NULL}; + +static const xmlChSRange xmlPeS[] = {{0x29, 0x29}, {0x5d, 0x5d}, + {0x7d, 0x7d}, {0xf3b, 0xf3b}, {0xf3d, 0xf3d}, {0x169c, 0x169c}, + {0x2046, 0x2046}, {0x207e, 0x207e}, {0x208e, 0x208e}, {0x232a, 0x232a}, + {0x23b5, 0x23b5}, {0x2769, 0x2769}, {0x276b, 0x276b}, {0x276d, 0x276d}, + {0x276f, 0x276f}, {0x2771, 0x2771}, {0x2773, 0x2773}, {0x2775, 0x2775}, + {0x27e7, 0x27e7}, {0x27e9, 0x27e9}, {0x27eb, 0x27eb}, {0x2984, 0x2984}, + {0x2986, 0x2986}, {0x2988, 0x2988}, {0x298a, 0x298a}, {0x298c, 0x298c}, + {0x298e, 0x298e}, {0x2990, 0x2990}, {0x2992, 0x2992}, {0x2994, 0x2994}, + {0x2996, 0x2996}, {0x2998, 0x2998}, {0x29d9, 0x29d9}, {0x29db, 0x29db}, + {0x29fd, 0x29fd}, {0x3009, 0x3009}, {0x300b, 0x300b}, {0x300d, 0x300d}, + {0x300f, 0x300f}, {0x3011, 0x3011}, {0x3015, 0x3015}, {0x3017, 0x3017}, + {0x3019, 0x3019}, {0x301b, 0x301b}, {0x301e, 0x301f}, {0xfd3f, 0xfd3f}, + {0xfe36, 0xfe36}, {0xfe38, 0xfe38}, {0xfe3a, 0xfe3a}, {0xfe3c, 0xfe3c}, + {0xfe3e, 0xfe3e}, {0xfe40, 0xfe40}, {0xfe42, 0xfe42}, {0xfe44, 0xfe44}, + {0xfe48, 0xfe48}, {0xfe5a, 0xfe5a}, {0xfe5c, 0xfe5c}, {0xfe5e, 0xfe5e}, + {0xff09, 0xff09}, {0xff3d, 0xff3d}, {0xff5d, 0xff5d}, {0xff60, 0xff60}, + {0xff63, 0xff63} }; +static xmlChRangeGroup xmlPeG = {63,0,xmlPeS,NULL}; + +static const xmlChSRange xmlPoS[] = {{0x21, 0x23}, {0x25, 0x27}, + {0x2a, 0x2a}, {0x2c, 0x2c}, {0x2e, 0x2f}, {0x3a, 0x3b}, {0x3f, 0x40}, + {0x5c, 0x5c}, {0xa1, 0xa1}, {0xb7, 0xb7}, {0xbf, 0xbf}, {0x37e, 0x37e}, + {0x387, 0x387}, {0x55a, 0x55f}, {0x589, 0x589}, {0x5be, 0x5be}, + {0x5c0, 0x5c0}, {0x5c3, 0x5c3}, {0x5f3, 0x5f4}, {0x60c, 0x60d}, + {0x61b, 0x61b}, {0x61f, 0x61f}, {0x66a, 0x66d}, {0x6d4, 0x6d4}, + {0x700, 0x70d}, {0x964, 0x965}, {0x970, 0x970}, {0xdf4, 0xdf4}, + {0xe4f, 0xe4f}, {0xe5a, 0xe5b}, {0xf04, 0xf12}, {0xf85, 0xf85}, + {0x104a, 0x104f}, {0x10fb, 0x10fb}, {0x1361, 0x1368}, {0x166d, 0x166e}, + {0x16eb, 0x16ed}, {0x1735, 0x1736}, {0x17d4, 0x17d6}, {0x17d8, 0x17da}, + {0x1800, 0x1805}, {0x1807, 0x180a}, {0x1944, 0x1945}, {0x2016, 0x2017}, + {0x2020, 0x2027}, {0x2030, 0x2038}, {0x203b, 0x203e}, {0x2041, 0x2043}, + {0x2047, 0x2051}, {0x2053, 0x2053}, {0x2057, 0x2057}, {0x23b6, 0x23b6}, + {0x3001, 0x3003}, {0x303d, 0x303d}, {0xfe30, 0xfe30}, {0xfe45, 0xfe46}, + {0xfe49, 0xfe4c}, {0xfe50, 0xfe52}, {0xfe54, 0xfe57}, {0xfe5f, 0xfe61}, + {0xfe68, 0xfe68}, {0xfe6a, 0xfe6b}, {0xff01, 0xff03}, {0xff05, 0xff07}, + {0xff0a, 0xff0a}, {0xff0c, 0xff0c}, {0xff0e, 0xff0f}, {0xff1a, 0xff1b}, + {0xff1f, 0xff20}, {0xff3c, 0xff3c}, {0xff61, 0xff61}, {0xff64, 0xff64} }; +static const xmlChLRange xmlPoL[] = {{0x10100, 0x10101}, {0x1039f, 0x1039f} }; +static xmlChRangeGroup xmlPoG = {72,2,xmlPoS,xmlPoL}; + +static const xmlChSRange xmlPsS[] = {{0x28, 0x28}, {0x5b, 0x5b}, + {0x7b, 0x7b}, {0xf3a, 0xf3a}, {0xf3c, 0xf3c}, {0x169b, 0x169b}, + {0x201a, 0x201a}, {0x201e, 0x201e}, {0x2045, 0x2045}, {0x207d, 0x207d}, + {0x208d, 0x208d}, {0x2329, 0x2329}, {0x23b4, 0x23b4}, {0x2768, 0x2768}, + {0x276a, 0x276a}, {0x276c, 0x276c}, {0x276e, 0x276e}, {0x2770, 0x2770}, + {0x2772, 0x2772}, {0x2774, 0x2774}, {0x27e6, 0x27e6}, {0x27e8, 0x27e8}, + {0x27ea, 0x27ea}, {0x2983, 0x2983}, {0x2985, 0x2985}, {0x2987, 0x2987}, + {0x2989, 0x2989}, {0x298b, 0x298b}, {0x298d, 0x298d}, {0x298f, 0x298f}, + {0x2991, 0x2991}, {0x2993, 0x2993}, {0x2995, 0x2995}, {0x2997, 0x2997}, + {0x29d8, 0x29d8}, {0x29da, 0x29da}, {0x29fc, 0x29fc}, {0x3008, 0x3008}, + {0x300a, 0x300a}, {0x300c, 0x300c}, {0x300e, 0x300e}, {0x3010, 0x3010}, + {0x3014, 0x3014}, {0x3016, 0x3016}, {0x3018, 0x3018}, {0x301a, 0x301a}, + {0x301d, 0x301d}, {0xfd3e, 0xfd3e}, {0xfe35, 0xfe35}, {0xfe37, 0xfe37}, + {0xfe39, 0xfe39}, {0xfe3b, 0xfe3b}, {0xfe3d, 0xfe3d}, {0xfe3f, 0xfe3f}, + {0xfe41, 0xfe41}, {0xfe43, 0xfe43}, {0xfe47, 0xfe47}, {0xfe59, 0xfe59}, + {0xfe5b, 0xfe5b}, {0xfe5d, 0xfe5d}, {0xff08, 0xff08}, {0xff3b, 0xff3b}, + {0xff5b, 0xff5b}, {0xff5f, 0xff5f}, {0xff62, 0xff62} }; +static xmlChRangeGroup xmlPsG = {65,0,xmlPsS,NULL}; + +static const xmlChSRange xmlSS[] = {{0x24, 0x24}, {0x2b, 0x2b}, + {0x3c, 0x3e}, {0x5e, 0x5e}, {0x60, 0x60}, {0x7c, 0x7c}, {0x7e, 0x7e}, + {0xa2, 0xa9}, {0xac, 0xac}, {0xae, 0xb1}, {0xb4, 0xb4}, {0xb6, 0xb6}, + {0xb8, 0xb8}, {0xd7, 0xd7}, {0xf7, 0xf7}, {0x2c2, 0x2c5}, + {0x2d2, 0x2df}, {0x2e5, 0x2ed}, {0x2ef, 0x2ff}, {0x374, 0x375}, + {0x384, 0x385}, {0x3f6, 0x3f6}, {0x482, 0x482}, {0x60e, 0x60f}, + {0x6e9, 0x6e9}, {0x6fd, 0x6fe}, {0x9f2, 0x9f3}, {0x9fa, 0x9fa}, + {0xaf1, 0xaf1}, {0xb70, 0xb70}, {0xbf3, 0xbfa}, {0xe3f, 0xe3f}, + {0xf01, 0xf03}, {0xf13, 0xf17}, {0xf1a, 0xf1f}, {0xf34, 0xf34}, + {0xf36, 0xf36}, {0xf38, 0xf38}, {0xfbe, 0xfc5}, {0xfc7, 0xfcc}, + {0xfcf, 0xfcf}, {0x17db, 0x17db}, {0x1940, 0x1940}, {0x19e0, 0x19ff}, + {0x1fbd, 0x1fbd}, {0x1fbf, 0x1fc1}, {0x1fcd, 0x1fcf}, {0x1fdd, 0x1fdf}, + {0x1fed, 0x1fef}, {0x1ffd, 0x1ffe}, {0x2044, 0x2044}, {0x2052, 0x2052}, + {0x207a, 0x207c}, {0x208a, 0x208c}, {0x20a0, 0x20b1}, {0x2100, 0x2101}, + {0x2103, 0x2106}, {0x2108, 0x2109}, {0x2114, 0x2114}, {0x2116, 0x2118}, + {0x211e, 0x2123}, {0x2125, 0x2125}, {0x2127, 0x2127}, {0x2129, 0x2129}, + {0x212e, 0x212e}, {0x2132, 0x2132}, {0x213a, 0x213b}, {0x2140, 0x2144}, + {0x214a, 0x214b}, {0x2190, 0x2328}, {0x232b, 0x23b3}, {0x23b7, 0x23d0}, + {0x2400, 0x2426}, {0x2440, 0x244a}, {0x249c, 0x24e9}, {0x2500, 0x2617}, + {0x2619, 0x267d}, {0x2680, 0x2691}, {0x26a0, 0x26a1}, {0x2701, 0x2704}, + {0x2706, 0x2709}, {0x270c, 0x2727}, {0x2729, 0x274b}, {0x274d, 0x274d}, + {0x274f, 0x2752}, {0x2756, 0x2756}, {0x2758, 0x275e}, {0x2761, 0x2767}, + {0x2794, 0x2794}, {0x2798, 0x27af}, {0x27b1, 0x27be}, {0x27d0, 0x27e5}, + {0x27f0, 0x2982}, {0x2999, 0x29d7}, {0x29dc, 0x29fb}, {0x29fe, 0x2b0d}, + {0x2e80, 0x2e99}, {0x2e9b, 0x2ef3}, {0x2f00, 0x2fd5}, {0x2ff0, 0x2ffb}, + {0x3004, 0x3004}, {0x3012, 0x3013}, {0x3020, 0x3020}, {0x3036, 0x3037}, + {0x303e, 0x303f}, {0x309b, 0x309c}, {0x3190, 0x3191}, {0x3196, 0x319f}, + {0x3200, 0x321e}, {0x322a, 0x3243}, {0x3250, 0x3250}, {0x3260, 0x327d}, + {0x327f, 0x327f}, {0x328a, 0x32b0}, {0x32c0, 0x32fe}, {0x3300, 0x33ff}, + {0x4dc0, 0x4dff}, {0xa490, 0xa4c6}, {0xfb29, 0xfb29}, {0xfdfc, 0xfdfd}, + {0xfe62, 0xfe62}, {0xfe64, 0xfe66}, {0xfe69, 0xfe69}, {0xff04, 0xff04}, + {0xff0b, 0xff0b}, {0xff1c, 0xff1e}, {0xff3e, 0xff3e}, {0xff40, 0xff40}, + {0xff5c, 0xff5c}, {0xff5e, 0xff5e}, {0xffe0, 0xffe6}, {0xffe8, 0xffee}, + {0xfffc, 0xfffd} }; +static const xmlChLRange xmlSL[] = {{0x10102, 0x10102}, {0x10137, 0x1013f}, + {0x1d000, 0x1d0f5}, {0x1d100, 0x1d126}, {0x1d12a, 0x1d164}, + {0x1d16a, 0x1d16c}, {0x1d183, 0x1d184}, {0x1d18c, 0x1d1a9}, + {0x1d1ae, 0x1d1dd}, {0x1d300, 0x1d356}, {0x1d6c1, 0x1d6c1}, + {0x1d6db, 0x1d6db}, {0x1d6fb, 0x1d6fb}, {0x1d715, 0x1d715}, + {0x1d735, 0x1d735}, {0x1d74f, 0x1d74f}, {0x1d76f, 0x1d76f}, + {0x1d789, 0x1d789}, {0x1d7a9, 0x1d7a9}, {0x1d7c3, 0x1d7c3} }; +static xmlChRangeGroup xmlSG = {133,20,xmlSS,xmlSL}; + +static const xmlChSRange xmlScS[] = {{0x24, 0x24}, {0xa2, 0xa5}, + {0x9f2, 0x9f3}, {0xaf1, 0xaf1}, {0xbf9, 0xbf9}, {0xe3f, 0xe3f}, + {0x17db, 0x17db}, {0x20a0, 0x20b1}, {0xfdfc, 0xfdfc}, {0xfe69, 0xfe69}, + {0xff04, 0xff04}, {0xffe0, 0xffe1}, {0xffe5, 0xffe6} }; +static xmlChRangeGroup xmlScG = {13,0,xmlScS,NULL}; + +static const xmlChSRange xmlSkS[] = {{0x5e, 0x5e}, {0x60, 0x60}, + {0xa8, 0xa8}, {0xaf, 0xaf}, {0xb4, 0xb4}, {0xb8, 0xb8}, {0x2c2, 0x2c5}, + {0x2d2, 0x2df}, {0x2e5, 0x2ed}, {0x2ef, 0x2ff}, {0x374, 0x375}, + {0x384, 0x385}, {0x1fbd, 0x1fbd}, {0x1fbf, 0x1fc1}, {0x1fcd, 0x1fcf}, + {0x1fdd, 0x1fdf}, {0x1fed, 0x1fef}, {0x1ffd, 0x1ffe}, {0x309b, 0x309c}, + {0xff3e, 0xff3e}, {0xff40, 0xff40}, {0xffe3, 0xffe3} }; +static xmlChRangeGroup xmlSkG = {22,0,xmlSkS,NULL}; + +static const xmlChSRange xmlSmS[] = {{0x2b, 0x2b}, {0x3c, 0x3e}, + {0x7c, 0x7c}, {0x7e, 0x7e}, {0xac, 0xac}, {0xb1, 0xb1}, {0xd7, 0xd7}, + {0xf7, 0xf7}, {0x3f6, 0x3f6}, {0x2044, 0x2044}, {0x2052, 0x2052}, + {0x207a, 0x207c}, {0x208a, 0x208c}, {0x2140, 0x2144}, {0x214b, 0x214b}, + {0x2190, 0x2194}, {0x219a, 0x219b}, {0x21a0, 0x21a0}, {0x21a3, 0x21a3}, + {0x21a6, 0x21a6}, {0x21ae, 0x21ae}, {0x21ce, 0x21cf}, {0x21d2, 0x21d2}, + {0x21d4, 0x21d4}, {0x21f4, 0x22ff}, {0x2308, 0x230b}, {0x2320, 0x2321}, + {0x237c, 0x237c}, {0x239b, 0x23b3}, {0x25b7, 0x25b7}, {0x25c1, 0x25c1}, + {0x25f8, 0x25ff}, {0x266f, 0x266f}, {0x27d0, 0x27e5}, {0x27f0, 0x27ff}, + {0x2900, 0x2982}, {0x2999, 0x29d7}, {0x29dc, 0x29fb}, {0x29fe, 0x2aff}, + {0xfb29, 0xfb29}, {0xfe62, 0xfe62}, {0xfe64, 0xfe66}, {0xff0b, 0xff0b}, + {0xff1c, 0xff1e}, {0xff5c, 0xff5c}, {0xff5e, 0xff5e}, {0xffe2, 0xffe2}, + {0xffe9, 0xffec} }; +static const xmlChLRange xmlSmL[] = {{0x1d6c1, 0x1d6c1}, {0x1d6db, 0x1d6db}, + {0x1d6fb, 0x1d6fb}, {0x1d715, 0x1d715}, {0x1d735, 0x1d735}, + {0x1d74f, 0x1d74f}, {0x1d76f, 0x1d76f}, {0x1d789, 0x1d789}, + {0x1d7a9, 0x1d7a9}, {0x1d7c3, 0x1d7c3} }; +static xmlChRangeGroup xmlSmG = {48,10,xmlSmS,xmlSmL}; + +static const xmlChSRange xmlSoS[] = {{0xa6, 0xa7}, {0xa9, 0xa9}, + {0xae, 0xae}, {0xb0, 0xb0}, {0xb6, 0xb6}, {0x482, 0x482}, + {0x60e, 0x60f}, {0x6e9, 0x6e9}, {0x6fd, 0x6fe}, {0x9fa, 0x9fa}, + {0xb70, 0xb70}, {0xbf3, 0xbf8}, {0xbfa, 0xbfa}, {0xf01, 0xf03}, + {0xf13, 0xf17}, {0xf1a, 0xf1f}, {0xf34, 0xf34}, {0xf36, 0xf36}, + {0xf38, 0xf38}, {0xfbe, 0xfc5}, {0xfc7, 0xfcc}, {0xfcf, 0xfcf}, + {0x1940, 0x1940}, {0x19e0, 0x19ff}, {0x2100, 0x2101}, {0x2103, 0x2106}, + {0x2108, 0x2109}, {0x2114, 0x2114}, {0x2116, 0x2118}, {0x211e, 0x2123}, + {0x2125, 0x2125}, {0x2127, 0x2127}, {0x2129, 0x2129}, {0x212e, 0x212e}, + {0x2132, 0x2132}, {0x213a, 0x213b}, {0x214a, 0x214a}, {0x2195, 0x2199}, + {0x219c, 0x219f}, {0x21a1, 0x21a2}, {0x21a4, 0x21a5}, {0x21a7, 0x21ad}, + {0x21af, 0x21cd}, {0x21d0, 0x21d1}, {0x21d3, 0x21d3}, {0x21d5, 0x21f3}, + {0x2300, 0x2307}, {0x230c, 0x231f}, {0x2322, 0x2328}, {0x232b, 0x237b}, + {0x237d, 0x239a}, {0x23b7, 0x23d0}, {0x2400, 0x2426}, {0x2440, 0x244a}, + {0x249c, 0x24e9}, {0x2500, 0x25b6}, {0x25b8, 0x25c0}, {0x25c2, 0x25f7}, + {0x2600, 0x2617}, {0x2619, 0x266e}, {0x2670, 0x267d}, {0x2680, 0x2691}, + {0x26a0, 0x26a1}, {0x2701, 0x2704}, {0x2706, 0x2709}, {0x270c, 0x2727}, + {0x2729, 0x274b}, {0x274d, 0x274d}, {0x274f, 0x2752}, {0x2756, 0x2756}, + {0x2758, 0x275e}, {0x2761, 0x2767}, {0x2794, 0x2794}, {0x2798, 0x27af}, + {0x27b1, 0x27be}, {0x2800, 0x28ff}, {0x2b00, 0x2b0d}, {0x2e80, 0x2e99}, + {0x2e9b, 0x2ef3}, {0x2f00, 0x2fd5}, {0x2ff0, 0x2ffb}, {0x3004, 0x3004}, + {0x3012, 0x3013}, {0x3020, 0x3020}, {0x3036, 0x3037}, {0x303e, 0x303f}, + {0x3190, 0x3191}, {0x3196, 0x319f}, {0x3200, 0x321e}, {0x322a, 0x3243}, + {0x3250, 0x3250}, {0x3260, 0x327d}, {0x327f, 0x327f}, {0x328a, 0x32b0}, + {0x32c0, 0x32fe}, {0x3300, 0x33ff}, {0x4dc0, 0x4dff}, {0xa490, 0xa4c6}, + {0xfdfd, 0xfdfd}, {0xffe4, 0xffe4}, {0xffe8, 0xffe8}, {0xffed, 0xffee}, + {0xfffc, 0xfffd} }; +static const xmlChLRange xmlSoL[] = {{0x10102, 0x10102}, {0x10137, 0x1013f}, + {0x1d000, 0x1d0f5}, {0x1d100, 0x1d126}, {0x1d12a, 0x1d164}, + {0x1d16a, 0x1d16c}, {0x1d183, 0x1d184}, {0x1d18c, 0x1d1a9}, + {0x1d1ae, 0x1d1dd}, {0x1d300, 0x1d356} }; +static xmlChRangeGroup xmlSoG = {103,10,xmlSoS,xmlSoL}; + +static const xmlChSRange xmlZS[] = {{0x20, 0x20}, {0xa0, 0xa0}, + {0x1680, 0x1680}, {0x180e, 0x180e}, {0x2000, 0x200a}, {0x2028, 0x2029}, + {0x202f, 0x202f}, {0x205f, 0x205f}, {0x3000, 0x3000} }; +static xmlChRangeGroup xmlZG = {9,0,xmlZS,NULL}; + +static xmlUnicodeNameTable xmlUnicodeBlockTbl = {xmlUnicodeBlocks, 128}; +static xmlUnicodeNameTable xmlUnicodeCatTbl = {xmlUnicodeCats, 36}; + +/** + * xmlUnicodeLookup: + * @tptr: pointer to the name table + * @name: name to be found + * + * binary table lookup for user-supplied name + * + * Returns pointer to range function if found, otherwise NULL + */ +static xmlIntFunc +*xmlUnicodeLookup(xmlUnicodeNameTable *tptr, const char *tname) { + int low, high, mid, cmp; + xmlUnicodeRange *sptr; + + if ((tptr == NULL) || (tname == NULL)) return(NULL); + + low = 0; + high = tptr->numentries - 1; + sptr = tptr->table; + while (low <= high) { + mid = (low + high) / 2; + if ((cmp=strcmp(tname, sptr[mid].rangename)) == 0) + return (sptr[mid].func); + if (cmp < 0) + high = mid - 1; + else + low = mid + 1; + } + return (NULL); +} + +/** + * xmlUCSIsAegeanNumbers: + * @code: UCS code point + * + * Check whether the character is part of AegeanNumbers UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsAegeanNumbers(int code) { + return(((code >= 0x10100) && (code <= 0x1013F))); +} + +/** + * xmlUCSIsAlphabeticPresentationForms: + * @code: UCS code point + * + * Check whether the character is part of AlphabeticPresentationForms UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsAlphabeticPresentationForms(int code) { + return(((code >= 0xFB00) && (code <= 0xFB4F))); +} + +/** + * xmlUCSIsArabic: + * @code: UCS code point + * + * Check whether the character is part of Arabic UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsArabic(int code) { + return(((code >= 0x0600) && (code <= 0x06FF))); +} + +/** + * xmlUCSIsArabicPresentationFormsA: + * @code: UCS code point + * + * Check whether the character is part of ArabicPresentationForms-A UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsArabicPresentationFormsA(int code) { + return(((code >= 0xFB50) && (code <= 0xFDFF))); +} + +/** + * xmlUCSIsArabicPresentationFormsB: + * @code: UCS code point + * + * Check whether the character is part of ArabicPresentationForms-B UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsArabicPresentationFormsB(int code) { + return(((code >= 0xFE70) && (code <= 0xFEFF))); +} + +/** + * xmlUCSIsArmenian: + * @code: UCS code point + * + * Check whether the character is part of Armenian UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsArmenian(int code) { + return(((code >= 0x0530) && (code <= 0x058F))); +} + +/** + * xmlUCSIsArrows: + * @code: UCS code point + * + * Check whether the character is part of Arrows UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsArrows(int code) { + return(((code >= 0x2190) && (code <= 0x21FF))); +} + +/** + * xmlUCSIsBasicLatin: + * @code: UCS code point + * + * Check whether the character is part of BasicLatin UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBasicLatin(int code) { + return(((code >= 0x0000) && (code <= 0x007F))); +} + +/** + * xmlUCSIsBengali: + * @code: UCS code point + * + * Check whether the character is part of Bengali UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBengali(int code) { + return(((code >= 0x0980) && (code <= 0x09FF))); +} + +/** + * xmlUCSIsBlockElements: + * @code: UCS code point + * + * Check whether the character is part of BlockElements UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBlockElements(int code) { + return(((code >= 0x2580) && (code <= 0x259F))); +} + +/** + * xmlUCSIsBopomofo: + * @code: UCS code point + * + * Check whether the character is part of Bopomofo UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBopomofo(int code) { + return(((code >= 0x3100) && (code <= 0x312F))); +} + +/** + * xmlUCSIsBopomofoExtended: + * @code: UCS code point + * + * Check whether the character is part of BopomofoExtended UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBopomofoExtended(int code) { + return(((code >= 0x31A0) && (code <= 0x31BF))); +} + +/** + * xmlUCSIsBoxDrawing: + * @code: UCS code point + * + * Check whether the character is part of BoxDrawing UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBoxDrawing(int code) { + return(((code >= 0x2500) && (code <= 0x257F))); +} + +/** + * xmlUCSIsBraillePatterns: + * @code: UCS code point + * + * Check whether the character is part of BraillePatterns UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBraillePatterns(int code) { + return(((code >= 0x2800) && (code <= 0x28FF))); +} + +/** + * xmlUCSIsBuhid: + * @code: UCS code point + * + * Check whether the character is part of Buhid UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBuhid(int code) { + return(((code >= 0x1740) && (code <= 0x175F))); +} + +/** + * xmlUCSIsByzantineMusicalSymbols: + * @code: UCS code point + * + * Check whether the character is part of ByzantineMusicalSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsByzantineMusicalSymbols(int code) { + return(((code >= 0x1D000) && (code <= 0x1D0FF))); +} + +/** + * xmlUCSIsCJKCompatibility: + * @code: UCS code point + * + * Check whether the character is part of CJKCompatibility UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKCompatibility(int code) { + return(((code >= 0x3300) && (code <= 0x33FF))); +} + +/** + * xmlUCSIsCJKCompatibilityForms: + * @code: UCS code point + * + * Check whether the character is part of CJKCompatibilityForms UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKCompatibilityForms(int code) { + return(((code >= 0xFE30) && (code <= 0xFE4F))); +} + +/** + * xmlUCSIsCJKCompatibilityIdeographs: + * @code: UCS code point + * + * Check whether the character is part of CJKCompatibilityIdeographs UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKCompatibilityIdeographs(int code) { + return(((code >= 0xF900) && (code <= 0xFAFF))); +} + +/** + * xmlUCSIsCJKCompatibilityIdeographsSupplement: + * @code: UCS code point + * + * Check whether the character is part of CJKCompatibilityIdeographsSupplement UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKCompatibilityIdeographsSupplement(int code) { + return(((code >= 0x2F800) && (code <= 0x2FA1F))); +} + +/** + * xmlUCSIsCJKRadicalsSupplement: + * @code: UCS code point + * + * Check whether the character is part of CJKRadicalsSupplement UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKRadicalsSupplement(int code) { + return(((code >= 0x2E80) && (code <= 0x2EFF))); +} + +/** + * xmlUCSIsCJKSymbolsandPunctuation: + * @code: UCS code point + * + * Check whether the character is part of CJKSymbolsandPunctuation UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKSymbolsandPunctuation(int code) { + return(((code >= 0x3000) && (code <= 0x303F))); +} + +/** + * xmlUCSIsCJKUnifiedIdeographs: + * @code: UCS code point + * + * Check whether the character is part of CJKUnifiedIdeographs UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKUnifiedIdeographs(int code) { + return(((code >= 0x4E00) && (code <= 0x9FFF))); +} + +/** + * xmlUCSIsCJKUnifiedIdeographsExtensionA: + * @code: UCS code point + * + * Check whether the character is part of CJKUnifiedIdeographsExtensionA UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKUnifiedIdeographsExtensionA(int code) { + return(((code >= 0x3400) && (code <= 0x4DBF))); +} + +/** + * xmlUCSIsCJKUnifiedIdeographsExtensionB: + * @code: UCS code point + * + * Check whether the character is part of CJKUnifiedIdeographsExtensionB UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKUnifiedIdeographsExtensionB(int code) { + return(((code >= 0x20000) && (code <= 0x2A6DF))); +} + +/** + * xmlUCSIsCherokee: + * @code: UCS code point + * + * Check whether the character is part of Cherokee UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCherokee(int code) { + return(((code >= 0x13A0) && (code <= 0x13FF))); +} + +/** + * xmlUCSIsCombiningDiacriticalMarks: + * @code: UCS code point + * + * Check whether the character is part of CombiningDiacriticalMarks UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCombiningDiacriticalMarks(int code) { + return(((code >= 0x0300) && (code <= 0x036F))); +} + +/** + * xmlUCSIsCombiningDiacriticalMarksforSymbols: + * @code: UCS code point + * + * Check whether the character is part of CombiningDiacriticalMarksforSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCombiningDiacriticalMarksforSymbols(int code) { + return(((code >= 0x20D0) && (code <= 0x20FF))); +} + +/** + * xmlUCSIsCombiningHalfMarks: + * @code: UCS code point + * + * Check whether the character is part of CombiningHalfMarks UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCombiningHalfMarks(int code) { + return(((code >= 0xFE20) && (code <= 0xFE2F))); +} + +/** + * xmlUCSIsCombiningMarksforSymbols: + * @code: UCS code point + * + * Check whether the character is part of CombiningMarksforSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCombiningMarksforSymbols(int code) { + return(((code >= 0x20D0) && (code <= 0x20FF))); +} + +/** + * xmlUCSIsControlPictures: + * @code: UCS code point + * + * Check whether the character is part of ControlPictures UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsControlPictures(int code) { + return(((code >= 0x2400) && (code <= 0x243F))); +} + +/** + * xmlUCSIsCurrencySymbols: + * @code: UCS code point + * + * Check whether the character is part of CurrencySymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCurrencySymbols(int code) { + return(((code >= 0x20A0) && (code <= 0x20CF))); +} + +/** + * xmlUCSIsCypriotSyllabary: + * @code: UCS code point + * + * Check whether the character is part of CypriotSyllabary UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCypriotSyllabary(int code) { + return(((code >= 0x10800) && (code <= 0x1083F))); +} + +/** + * xmlUCSIsCyrillic: + * @code: UCS code point + * + * Check whether the character is part of Cyrillic UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCyrillic(int code) { + return(((code >= 0x0400) && (code <= 0x04FF))); +} + +/** + * xmlUCSIsCyrillicSupplement: + * @code: UCS code point + * + * Check whether the character is part of CyrillicSupplement UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCyrillicSupplement(int code) { + return(((code >= 0x0500) && (code <= 0x052F))); +} + +/** + * xmlUCSIsDeseret: + * @code: UCS code point + * + * Check whether the character is part of Deseret UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsDeseret(int code) { + return(((code >= 0x10400) && (code <= 0x1044F))); +} + +/** + * xmlUCSIsDevanagari: + * @code: UCS code point + * + * Check whether the character is part of Devanagari UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsDevanagari(int code) { + return(((code >= 0x0900) && (code <= 0x097F))); +} + +/** + * xmlUCSIsDingbats: + * @code: UCS code point + * + * Check whether the character is part of Dingbats UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsDingbats(int code) { + return(((code >= 0x2700) && (code <= 0x27BF))); +} + +/** + * xmlUCSIsEnclosedAlphanumerics: + * @code: UCS code point + * + * Check whether the character is part of EnclosedAlphanumerics UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsEnclosedAlphanumerics(int code) { + return(((code >= 0x2460) && (code <= 0x24FF))); +} + +/** + * xmlUCSIsEnclosedCJKLettersandMonths: + * @code: UCS code point + * + * Check whether the character is part of EnclosedCJKLettersandMonths UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsEnclosedCJKLettersandMonths(int code) { + return(((code >= 0x3200) && (code <= 0x32FF))); +} + +/** + * xmlUCSIsEthiopic: + * @code: UCS code point + * + * Check whether the character is part of Ethiopic UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsEthiopic(int code) { + return(((code >= 0x1200) && (code <= 0x137F))); +} + +/** + * xmlUCSIsGeneralPunctuation: + * @code: UCS code point + * + * Check whether the character is part of GeneralPunctuation UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGeneralPunctuation(int code) { + return(((code >= 0x2000) && (code <= 0x206F))); +} + +/** + * xmlUCSIsGeometricShapes: + * @code: UCS code point + * + * Check whether the character is part of GeometricShapes UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGeometricShapes(int code) { + return(((code >= 0x25A0) && (code <= 0x25FF))); +} + +/** + * xmlUCSIsGeorgian: + * @code: UCS code point + * + * Check whether the character is part of Georgian UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGeorgian(int code) { + return(((code >= 0x10A0) && (code <= 0x10FF))); +} + +/** + * xmlUCSIsGothic: + * @code: UCS code point + * + * Check whether the character is part of Gothic UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGothic(int code) { + return(((code >= 0x10330) && (code <= 0x1034F))); +} + +/** + * xmlUCSIsGreek: + * @code: UCS code point + * + * Check whether the character is part of Greek UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGreek(int code) { + return(((code >= 0x0370) && (code <= 0x03FF))); +} + +/** + * xmlUCSIsGreekExtended: + * @code: UCS code point + * + * Check whether the character is part of GreekExtended UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGreekExtended(int code) { + return(((code >= 0x1F00) && (code <= 0x1FFF))); +} + +/** + * xmlUCSIsGreekandCoptic: + * @code: UCS code point + * + * Check whether the character is part of GreekandCoptic UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGreekandCoptic(int code) { + return(((code >= 0x0370) && (code <= 0x03FF))); +} + +/** + * xmlUCSIsGujarati: + * @code: UCS code point + * + * Check whether the character is part of Gujarati UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGujarati(int code) { + return(((code >= 0x0A80) && (code <= 0x0AFF))); +} + +/** + * xmlUCSIsGurmukhi: + * @code: UCS code point + * + * Check whether the character is part of Gurmukhi UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGurmukhi(int code) { + return(((code >= 0x0A00) && (code <= 0x0A7F))); +} + +/** + * xmlUCSIsHalfwidthandFullwidthForms: + * @code: UCS code point + * + * Check whether the character is part of HalfwidthandFullwidthForms UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHalfwidthandFullwidthForms(int code) { + return(((code >= 0xFF00) && (code <= 0xFFEF))); +} + +/** + * xmlUCSIsHangulCompatibilityJamo: + * @code: UCS code point + * + * Check whether the character is part of HangulCompatibilityJamo UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHangulCompatibilityJamo(int code) { + return(((code >= 0x3130) && (code <= 0x318F))); +} + +/** + * xmlUCSIsHangulJamo: + * @code: UCS code point + * + * Check whether the character is part of HangulJamo UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHangulJamo(int code) { + return(((code >= 0x1100) && (code <= 0x11FF))); +} + +/** + * xmlUCSIsHangulSyllables: + * @code: UCS code point + * + * Check whether the character is part of HangulSyllables UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHangulSyllables(int code) { + return(((code >= 0xAC00) && (code <= 0xD7AF))); +} + +/** + * xmlUCSIsHanunoo: + * @code: UCS code point + * + * Check whether the character is part of Hanunoo UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHanunoo(int code) { + return(((code >= 0x1720) && (code <= 0x173F))); +} + +/** + * xmlUCSIsHebrew: + * @code: UCS code point + * + * Check whether the character is part of Hebrew UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHebrew(int code) { + return(((code >= 0x0590) && (code <= 0x05FF))); +} + +/** + * xmlUCSIsHighPrivateUseSurrogates: + * @code: UCS code point + * + * Check whether the character is part of HighPrivateUseSurrogates UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHighPrivateUseSurrogates(int code) { + return(((code >= 0xDB80) && (code <= 0xDBFF))); +} + +/** + * xmlUCSIsHighSurrogates: + * @code: UCS code point + * + * Check whether the character is part of HighSurrogates UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHighSurrogates(int code) { + return(((code >= 0xD800) && (code <= 0xDB7F))); +} + +/** + * xmlUCSIsHiragana: + * @code: UCS code point + * + * Check whether the character is part of Hiragana UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHiragana(int code) { + return(((code >= 0x3040) && (code <= 0x309F))); +} + +/** + * xmlUCSIsIPAExtensions: + * @code: UCS code point + * + * Check whether the character is part of IPAExtensions UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsIPAExtensions(int code) { + return(((code >= 0x0250) && (code <= 0x02AF))); +} + +/** + * xmlUCSIsIdeographicDescriptionCharacters: + * @code: UCS code point + * + * Check whether the character is part of IdeographicDescriptionCharacters UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsIdeographicDescriptionCharacters(int code) { + return(((code >= 0x2FF0) && (code <= 0x2FFF))); +} + +/** + * xmlUCSIsKanbun: + * @code: UCS code point + * + * Check whether the character is part of Kanbun UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsKanbun(int code) { + return(((code >= 0x3190) && (code <= 0x319F))); +} + +/** + * xmlUCSIsKangxiRadicals: + * @code: UCS code point + * + * Check whether the character is part of KangxiRadicals UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsKangxiRadicals(int code) { + return(((code >= 0x2F00) && (code <= 0x2FDF))); +} + +/** + * xmlUCSIsKannada: + * @code: UCS code point + * + * Check whether the character is part of Kannada UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsKannada(int code) { + return(((code >= 0x0C80) && (code <= 0x0CFF))); +} + +/** + * xmlUCSIsKatakana: + * @code: UCS code point + * + * Check whether the character is part of Katakana UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsKatakana(int code) { + return(((code >= 0x30A0) && (code <= 0x30FF))); +} + +/** + * xmlUCSIsKatakanaPhoneticExtensions: + * @code: UCS code point + * + * Check whether the character is part of KatakanaPhoneticExtensions UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsKatakanaPhoneticExtensions(int code) { + return(((code >= 0x31F0) && (code <= 0x31FF))); +} + +/** + * xmlUCSIsKhmer: + * @code: UCS code point + * + * Check whether the character is part of Khmer UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsKhmer(int code) { + return(((code >= 0x1780) && (code <= 0x17FF))); +} + +/** + * xmlUCSIsKhmerSymbols: + * @code: UCS code point + * + * Check whether the character is part of KhmerSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsKhmerSymbols(int code) { + return(((code >= 0x19E0) && (code <= 0x19FF))); +} + +/** + * xmlUCSIsLao: + * @code: UCS code point + * + * Check whether the character is part of Lao UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLao(int code) { + return(((code >= 0x0E80) && (code <= 0x0EFF))); +} + +/** + * xmlUCSIsLatin1Supplement: + * @code: UCS code point + * + * Check whether the character is part of Latin-1Supplement UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLatin1Supplement(int code) { + return(((code >= 0x0080) && (code <= 0x00FF))); +} + +/** + * xmlUCSIsLatinExtendedA: + * @code: UCS code point + * + * Check whether the character is part of LatinExtended-A UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLatinExtendedA(int code) { + return(((code >= 0x0100) && (code <= 0x017F))); +} + +/** + * xmlUCSIsLatinExtendedB: + * @code: UCS code point + * + * Check whether the character is part of LatinExtended-B UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLatinExtendedB(int code) { + return(((code >= 0x0180) && (code <= 0x024F))); +} + +/** + * xmlUCSIsLatinExtendedAdditional: + * @code: UCS code point + * + * Check whether the character is part of LatinExtendedAdditional UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLatinExtendedAdditional(int code) { + return(((code >= 0x1E00) && (code <= 0x1EFF))); +} + +/** + * xmlUCSIsLetterlikeSymbols: + * @code: UCS code point + * + * Check whether the character is part of LetterlikeSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLetterlikeSymbols(int code) { + return(((code >= 0x2100) && (code <= 0x214F))); +} + +/** + * xmlUCSIsLimbu: + * @code: UCS code point + * + * Check whether the character is part of Limbu UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLimbu(int code) { + return(((code >= 0x1900) && (code <= 0x194F))); +} + +/** + * xmlUCSIsLinearBIdeograms: + * @code: UCS code point + * + * Check whether the character is part of LinearBIdeograms UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLinearBIdeograms(int code) { + return(((code >= 0x10080) && (code <= 0x100FF))); +} + +/** + * xmlUCSIsLinearBSyllabary: + * @code: UCS code point + * + * Check whether the character is part of LinearBSyllabary UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLinearBSyllabary(int code) { + return(((code >= 0x10000) && (code <= 0x1007F))); +} + +/** + * xmlUCSIsLowSurrogates: + * @code: UCS code point + * + * Check whether the character is part of LowSurrogates UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLowSurrogates(int code) { + return(((code >= 0xDC00) && (code <= 0xDFFF))); +} + +/** + * xmlUCSIsMalayalam: + * @code: UCS code point + * + * Check whether the character is part of Malayalam UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMalayalam(int code) { + return(((code >= 0x0D00) && (code <= 0x0D7F))); +} + +/** + * xmlUCSIsMathematicalAlphanumericSymbols: + * @code: UCS code point + * + * Check whether the character is part of MathematicalAlphanumericSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMathematicalAlphanumericSymbols(int code) { + return(((code >= 0x1D400) && (code <= 0x1D7FF))); +} + +/** + * xmlUCSIsMathematicalOperators: + * @code: UCS code point + * + * Check whether the character is part of MathematicalOperators UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMathematicalOperators(int code) { + return(((code >= 0x2200) && (code <= 0x22FF))); +} + +/** + * xmlUCSIsMiscellaneousMathematicalSymbolsA: + * @code: UCS code point + * + * Check whether the character is part of MiscellaneousMathematicalSymbols-A UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMiscellaneousMathematicalSymbolsA(int code) { + return(((code >= 0x27C0) && (code <= 0x27EF))); +} + +/** + * xmlUCSIsMiscellaneousMathematicalSymbolsB: + * @code: UCS code point + * + * Check whether the character is part of MiscellaneousMathematicalSymbols-B UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMiscellaneousMathematicalSymbolsB(int code) { + return(((code >= 0x2980) && (code <= 0x29FF))); +} + +/** + * xmlUCSIsMiscellaneousSymbols: + * @code: UCS code point + * + * Check whether the character is part of MiscellaneousSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMiscellaneousSymbols(int code) { + return(((code >= 0x2600) && (code <= 0x26FF))); +} + +/** + * xmlUCSIsMiscellaneousSymbolsandArrows: + * @code: UCS code point + * + * Check whether the character is part of MiscellaneousSymbolsandArrows UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMiscellaneousSymbolsandArrows(int code) { + return(((code >= 0x2B00) && (code <= 0x2BFF))); +} + +/** + * xmlUCSIsMiscellaneousTechnical: + * @code: UCS code point + * + * Check whether the character is part of MiscellaneousTechnical UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMiscellaneousTechnical(int code) { + return(((code >= 0x2300) && (code <= 0x23FF))); +} + +/** + * xmlUCSIsMongolian: + * @code: UCS code point + * + * Check whether the character is part of Mongolian UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMongolian(int code) { + return(((code >= 0x1800) && (code <= 0x18AF))); +} + +/** + * xmlUCSIsMusicalSymbols: + * @code: UCS code point + * + * Check whether the character is part of MusicalSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMusicalSymbols(int code) { + return(((code >= 0x1D100) && (code <= 0x1D1FF))); +} + +/** + * xmlUCSIsMyanmar: + * @code: UCS code point + * + * Check whether the character is part of Myanmar UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMyanmar(int code) { + return(((code >= 0x1000) && (code <= 0x109F))); +} + +/** + * xmlUCSIsNumberForms: + * @code: UCS code point + * + * Check whether the character is part of NumberForms UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsNumberForms(int code) { + return(((code >= 0x2150) && (code <= 0x218F))); +} + +/** + * xmlUCSIsOgham: + * @code: UCS code point + * + * Check whether the character is part of Ogham UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsOgham(int code) { + return(((code >= 0x1680) && (code <= 0x169F))); +} + +/** + * xmlUCSIsOldItalic: + * @code: UCS code point + * + * Check whether the character is part of OldItalic UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsOldItalic(int code) { + return(((code >= 0x10300) && (code <= 0x1032F))); +} + +/** + * xmlUCSIsOpticalCharacterRecognition: + * @code: UCS code point + * + * Check whether the character is part of OpticalCharacterRecognition UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsOpticalCharacterRecognition(int code) { + return(((code >= 0x2440) && (code <= 0x245F))); +} + +/** + * xmlUCSIsOriya: + * @code: UCS code point + * + * Check whether the character is part of Oriya UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsOriya(int code) { + return(((code >= 0x0B00) && (code <= 0x0B7F))); +} + +/** + * xmlUCSIsOsmanya: + * @code: UCS code point + * + * Check whether the character is part of Osmanya UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsOsmanya(int code) { + return(((code >= 0x10480) && (code <= 0x104AF))); +} + +/** + * xmlUCSIsPhoneticExtensions: + * @code: UCS code point + * + * Check whether the character is part of PhoneticExtensions UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsPhoneticExtensions(int code) { + return(((code >= 0x1D00) && (code <= 0x1D7F))); +} + +/** + * xmlUCSIsPrivateUse: + * @code: UCS code point + * + * Check whether the character is part of PrivateUse UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsPrivateUse(int code) { + return(((code >= 0xE000) && (code <= 0xF8FF)) || + ((code >= 0xF0000) && (code <= 0xFFFFF)) || + ((code >= 0x100000) && (code <= 0x10FFFF))); +} + +/** + * xmlUCSIsPrivateUseArea: + * @code: UCS code point + * + * Check whether the character is part of PrivateUseArea UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsPrivateUseArea(int code) { + return(((code >= 0xE000) && (code <= 0xF8FF))); +} + +/** + * xmlUCSIsRunic: + * @code: UCS code point + * + * Check whether the character is part of Runic UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsRunic(int code) { + return(((code >= 0x16A0) && (code <= 0x16FF))); +} + +/** + * xmlUCSIsShavian: + * @code: UCS code point + * + * Check whether the character is part of Shavian UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsShavian(int code) { + return(((code >= 0x10450) && (code <= 0x1047F))); +} + +/** + * xmlUCSIsSinhala: + * @code: UCS code point + * + * Check whether the character is part of Sinhala UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSinhala(int code) { + return(((code >= 0x0D80) && (code <= 0x0DFF))); +} + +/** + * xmlUCSIsSmallFormVariants: + * @code: UCS code point + * + * Check whether the character is part of SmallFormVariants UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSmallFormVariants(int code) { + return(((code >= 0xFE50) && (code <= 0xFE6F))); +} + +/** + * xmlUCSIsSpacingModifierLetters: + * @code: UCS code point + * + * Check whether the character is part of SpacingModifierLetters UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSpacingModifierLetters(int code) { + return(((code >= 0x02B0) && (code <= 0x02FF))); +} + +/** + * xmlUCSIsSpecials: + * @code: UCS code point + * + * Check whether the character is part of Specials UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSpecials(int code) { + return(((code >= 0xFFF0) && (code <= 0xFFFF))); +} + +/** + * xmlUCSIsSuperscriptsandSubscripts: + * @code: UCS code point + * + * Check whether the character is part of SuperscriptsandSubscripts UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSuperscriptsandSubscripts(int code) { + return(((code >= 0x2070) && (code <= 0x209F))); +} + +/** + * xmlUCSIsSupplementalArrowsA: + * @code: UCS code point + * + * Check whether the character is part of SupplementalArrows-A UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSupplementalArrowsA(int code) { + return(((code >= 0x27F0) && (code <= 0x27FF))); +} + +/** + * xmlUCSIsSupplementalArrowsB: + * @code: UCS code point + * + * Check whether the character is part of SupplementalArrows-B UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSupplementalArrowsB(int code) { + return(((code >= 0x2900) && (code <= 0x297F))); +} + +/** + * xmlUCSIsSupplementalMathematicalOperators: + * @code: UCS code point + * + * Check whether the character is part of SupplementalMathematicalOperators UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSupplementalMathematicalOperators(int code) { + return(((code >= 0x2A00) && (code <= 0x2AFF))); +} + +/** + * xmlUCSIsSupplementaryPrivateUseAreaA: + * @code: UCS code point + * + * Check whether the character is part of SupplementaryPrivateUseArea-A UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSupplementaryPrivateUseAreaA(int code) { + return(((code >= 0xF0000) && (code <= 0xFFFFF))); +} + +/** + * xmlUCSIsSupplementaryPrivateUseAreaB: + * @code: UCS code point + * + * Check whether the character is part of SupplementaryPrivateUseArea-B UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSupplementaryPrivateUseAreaB(int code) { + return(((code >= 0x100000) && (code <= 0x10FFFF))); +} + +/** + * xmlUCSIsSyriac: + * @code: UCS code point + * + * Check whether the character is part of Syriac UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSyriac(int code) { + return(((code >= 0x0700) && (code <= 0x074F))); +} + +/** + * xmlUCSIsTagalog: + * @code: UCS code point + * + * Check whether the character is part of Tagalog UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsTagalog(int code) { + return(((code >= 0x1700) && (code <= 0x171F))); +} + +/** + * xmlUCSIsTagbanwa: + * @code: UCS code point + * + * Check whether the character is part of Tagbanwa UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsTagbanwa(int code) { + return(((code >= 0x1760) && (code <= 0x177F))); +} + +/** + * xmlUCSIsTags: + * @code: UCS code point + * + * Check whether the character is part of Tags UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsTags(int code) { + return(((code >= 0xE0000) && (code <= 0xE007F))); +} + +/** + * xmlUCSIsTaiLe: + * @code: UCS code point + * + * Check whether the character is part of TaiLe UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsTaiLe(int code) { + return(((code >= 0x1950) && (code <= 0x197F))); +} + +/** + * xmlUCSIsTaiXuanJingSymbols: + * @code: UCS code point + * + * Check whether the character is part of TaiXuanJingSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsTaiXuanJingSymbols(int code) { + return(((code >= 0x1D300) && (code <= 0x1D35F))); +} + +/** + * xmlUCSIsTamil: + * @code: UCS code point + * + * Check whether the character is part of Tamil UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsTamil(int code) { + return(((code >= 0x0B80) && (code <= 0x0BFF))); +} + +/** + * xmlUCSIsTelugu: + * @code: UCS code point + * + * Check whether the character is part of Telugu UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsTelugu(int code) { + return(((code >= 0x0C00) && (code <= 0x0C7F))); +} + +/** + * xmlUCSIsThaana: + * @code: UCS code point + * + * Check whether the character is part of Thaana UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsThaana(int code) { + return(((code >= 0x0780) && (code <= 0x07BF))); +} + +/** + * xmlUCSIsThai: + * @code: UCS code point + * + * Check whether the character is part of Thai UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsThai(int code) { + return(((code >= 0x0E00) && (code <= 0x0E7F))); +} + +/** + * xmlUCSIsTibetan: + * @code: UCS code point + * + * Check whether the character is part of Tibetan UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsTibetan(int code) { + return(((code >= 0x0F00) && (code <= 0x0FFF))); +} + +/** + * xmlUCSIsUgaritic: + * @code: UCS code point + * + * Check whether the character is part of Ugaritic UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsUgaritic(int code) { + return(((code >= 0x10380) && (code <= 0x1039F))); +} + +/** + * xmlUCSIsUnifiedCanadianAboriginalSyllabics: + * @code: UCS code point + * + * Check whether the character is part of UnifiedCanadianAboriginalSyllabics UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsUnifiedCanadianAboriginalSyllabics(int code) { + return(((code >= 0x1400) && (code <= 0x167F))); +} + +/** + * xmlUCSIsVariationSelectors: + * @code: UCS code point + * + * Check whether the character is part of VariationSelectors UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsVariationSelectors(int code) { + return(((code >= 0xFE00) && (code <= 0xFE0F))); +} + +/** + * xmlUCSIsVariationSelectorsSupplement: + * @code: UCS code point + * + * Check whether the character is part of VariationSelectorsSupplement UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsVariationSelectorsSupplement(int code) { + return(((code >= 0xE0100) && (code <= 0xE01EF))); +} + +/** + * xmlUCSIsYiRadicals: + * @code: UCS code point + * + * Check whether the character is part of YiRadicals UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsYiRadicals(int code) { + return(((code >= 0xA490) && (code <= 0xA4CF))); +} + +/** + * xmlUCSIsYiSyllables: + * @code: UCS code point + * + * Check whether the character is part of YiSyllables UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsYiSyllables(int code) { + return(((code >= 0xA000) && (code <= 0xA48F))); +} + +/** + * xmlUCSIsYijingHexagramSymbols: + * @code: UCS code point + * + * Check whether the character is part of YijingHexagramSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsYijingHexagramSymbols(int code) { + return(((code >= 0x4DC0) && (code <= 0x4DFF))); +} + +/** + * xmlUCSIsBlock: + * @code: UCS code point + * @block: UCS block name + * + * Check whether the character is part of the UCS Block + * + * Returns 1 if true, 0 if false and -1 on unknown block + */ +int +xmlUCSIsBlock(int code, const char *block) { + xmlIntFunc *func; + + func = xmlUnicodeLookup(&xmlUnicodeBlockTbl, block); + if (func == NULL) + return (-1); + return (func(code)); +} + +/** + * xmlUCSIsCatC: + * @code: UCS code point + * + * Check whether the character is part of C UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatC(int code) { + return(xmlCharInRange((unsigned int)code, &xmlCG)); +} + +/** + * xmlUCSIsCatCc: + * @code: UCS code point + * + * Check whether the character is part of Cc UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatCc(int code) { + return(((code >= 0x0) && (code <= 0x1f)) || + ((code >= 0x7f) && (code <= 0x9f))); +} + +/** + * xmlUCSIsCatCf: + * @code: UCS code point + * + * Check whether the character is part of Cf UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatCf(int code) { + return(xmlCharInRange((unsigned int)code, &xmlCfG)); +} + +/** + * xmlUCSIsCatCo: + * @code: UCS code point + * + * Check whether the character is part of Co UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatCo(int code) { + return((code == 0xe000) || + (code == 0xf8ff) || + (code == 0xf0000) || + (code == 0xffffd) || + (code == 0x100000) || + (code == 0x10fffd)); +} + +/** + * xmlUCSIsCatCs: + * @code: UCS code point + * + * Check whether the character is part of Cs UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatCs(int code) { + return((code == 0xd800) || + ((code >= 0xdb7f) && (code <= 0xdb80)) || + ((code >= 0xdbff) && (code <= 0xdc00)) || + (code == 0xdfff)); +} + +/** + * xmlUCSIsCatL: + * @code: UCS code point + * + * Check whether the character is part of L UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatL(int code) { + return(xmlCharInRange((unsigned int)code, &xmlLG)); +} + +/** + * xmlUCSIsCatLl: + * @code: UCS code point + * + * Check whether the character is part of Ll UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatLl(int code) { + return(xmlCharInRange((unsigned int)code, &xmlLlG)); +} + +/** + * xmlUCSIsCatLm: + * @code: UCS code point + * + * Check whether the character is part of Lm UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatLm(int code) { + return(xmlCharInRange((unsigned int)code, &xmlLmG)); +} + +/** + * xmlUCSIsCatLo: + * @code: UCS code point + * + * Check whether the character is part of Lo UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatLo(int code) { + return(xmlCharInRange((unsigned int)code, &xmlLoG)); +} + +/** + * xmlUCSIsCatLt: + * @code: UCS code point + * + * Check whether the character is part of Lt UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatLt(int code) { + return(xmlCharInRange((unsigned int)code, &xmlLtG)); +} + +/** + * xmlUCSIsCatLu: + * @code: UCS code point + * + * Check whether the character is part of Lu UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatLu(int code) { + return(xmlCharInRange((unsigned int)code, &xmlLuG)); +} + +/** + * xmlUCSIsCatM: + * @code: UCS code point + * + * Check whether the character is part of M UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatM(int code) { + return(xmlCharInRange((unsigned int)code, &xmlMG)); +} + +/** + * xmlUCSIsCatMc: + * @code: UCS code point + * + * Check whether the character is part of Mc UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatMc(int code) { + return(xmlCharInRange((unsigned int)code, &xmlMcG)); +} + +/** + * xmlUCSIsCatMe: + * @code: UCS code point + * + * Check whether the character is part of Me UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatMe(int code) { + return(((code >= 0x488) && (code <= 0x489)) || + (code == 0x6de) || + ((code >= 0x20dd) && (code <= 0x20e0)) || + ((code >= 0x20e2) && (code <= 0x20e4))); +} + +/** + * xmlUCSIsCatMn: + * @code: UCS code point + * + * Check whether the character is part of Mn UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatMn(int code) { + return(xmlCharInRange((unsigned int)code, &xmlMnG)); +} + +/** + * xmlUCSIsCatN: + * @code: UCS code point + * + * Check whether the character is part of N UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatN(int code) { + return(xmlCharInRange((unsigned int)code, &xmlNG)); +} + +/** + * xmlUCSIsCatNd: + * @code: UCS code point + * + * Check whether the character is part of Nd UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatNd(int code) { + return(xmlCharInRange((unsigned int)code, &xmlNdG)); +} + +/** + * xmlUCSIsCatNl: + * @code: UCS code point + * + * Check whether the character is part of Nl UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatNl(int code) { + return(((code >= 0x16ee) && (code <= 0x16f0)) || + ((code >= 0x2160) && (code <= 0x2183)) || + (code == 0x3007) || + ((code >= 0x3021) && (code <= 0x3029)) || + ((code >= 0x3038) && (code <= 0x303a)) || + (code == 0x1034a)); +} + +/** + * xmlUCSIsCatNo: + * @code: UCS code point + * + * Check whether the character is part of No UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatNo(int code) { + return(xmlCharInRange((unsigned int)code, &xmlNoG)); +} + +/** + * xmlUCSIsCatP: + * @code: UCS code point + * + * Check whether the character is part of P UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatP(int code) { + return(xmlCharInRange((unsigned int)code, &xmlPG)); +} + +/** + * xmlUCSIsCatPc: + * @code: UCS code point + * + * Check whether the character is part of Pc UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatPc(int code) { + return((code == 0x5f) || + ((code >= 0x203f) && (code <= 0x2040)) || + (code == 0x2054) || + (code == 0x30fb) || + ((code >= 0xfe33) && (code <= 0xfe34)) || + ((code >= 0xfe4d) && (code <= 0xfe4f)) || + (code == 0xff3f) || + (code == 0xff65)); +} + +/** + * xmlUCSIsCatPd: + * @code: UCS code point + * + * Check whether the character is part of Pd UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatPd(int code) { + return(xmlCharInRange((unsigned int)code, &xmlPdG)); +} + +/** + * xmlUCSIsCatPe: + * @code: UCS code point + * + * Check whether the character is part of Pe UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatPe(int code) { + return(xmlCharInRange((unsigned int)code, &xmlPeG)); +} + +/** + * xmlUCSIsCatPf: + * @code: UCS code point + * + * Check whether the character is part of Pf UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatPf(int code) { + return((code == 0xbb) || + (code == 0x2019) || + (code == 0x201d) || + (code == 0x203a)); +} + +/** + * xmlUCSIsCatPi: + * @code: UCS code point + * + * Check whether the character is part of Pi UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatPi(int code) { + return((code == 0xab) || + (code == 0x2018) || + ((code >= 0x201b) && (code <= 0x201c)) || + (code == 0x201f) || + (code == 0x2039)); +} + +/** + * xmlUCSIsCatPo: + * @code: UCS code point + * + * Check whether the character is part of Po UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatPo(int code) { + return(xmlCharInRange((unsigned int)code, &xmlPoG)); +} + +/** + * xmlUCSIsCatPs: + * @code: UCS code point + * + * Check whether the character is part of Ps UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatPs(int code) { + return(xmlCharInRange((unsigned int)code, &xmlPsG)); +} + +/** + * xmlUCSIsCatS: + * @code: UCS code point + * + * Check whether the character is part of S UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatS(int code) { + return(xmlCharInRange((unsigned int)code, &xmlSG)); +} + +/** + * xmlUCSIsCatSc: + * @code: UCS code point + * + * Check whether the character is part of Sc UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatSc(int code) { + return(xmlCharInRange((unsigned int)code, &xmlScG)); +} + +/** + * xmlUCSIsCatSk: + * @code: UCS code point + * + * Check whether the character is part of Sk UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatSk(int code) { + return(xmlCharInRange((unsigned int)code, &xmlSkG)); +} + +/** + * xmlUCSIsCatSm: + * @code: UCS code point + * + * Check whether the character is part of Sm UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatSm(int code) { + return(xmlCharInRange((unsigned int)code, &xmlSmG)); +} + +/** + * xmlUCSIsCatSo: + * @code: UCS code point + * + * Check whether the character is part of So UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatSo(int code) { + return(xmlCharInRange((unsigned int)code, &xmlSoG)); +} + +/** + * xmlUCSIsCatZ: + * @code: UCS code point + * + * Check whether the character is part of Z UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatZ(int code) { + return(xmlCharInRange((unsigned int)code, &xmlZG)); +} + +/** + * xmlUCSIsCatZl: + * @code: UCS code point + * + * Check whether the character is part of Zl UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatZl(int code) { + return((code == 0x2028)); +} + +/** + * xmlUCSIsCatZp: + * @code: UCS code point + * + * Check whether the character is part of Zp UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatZp(int code) { + return((code == 0x2029)); +} + +/** + * xmlUCSIsCatZs: + * @code: UCS code point + * + * Check whether the character is part of Zs UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatZs(int code) { + return((code == 0x20) || + (code == 0xa0) || + (code == 0x1680) || + (code == 0x180e) || + ((code >= 0x2000) && (code <= 0x200a)) || + (code == 0x202f) || + (code == 0x205f) || + (code == 0x3000)); +} + +/** + * xmlUCSIsCat: + * @code: UCS code point + * @cat: UCS Category name + * + * Check whether the character is part of the UCS Category + * + * Returns 1 if true, 0 if false and -1 on unknown category + */ +int +xmlUCSIsCat(int code, const char *cat) { + xmlIntFunc *func; + + func = xmlUnicodeLookup(&xmlUnicodeCatTbl, cat); + if (func == NULL) + return (-1); + return (func(code)); +} + +#include diff --git a/libxsde/xsde/c/regexp/xmlunicode.h b/libxsde/xsde/c/regexp/xmlunicode.h new file mode 100644 index 0000000..e9bea46 --- /dev/null +++ b/libxsde/xsde/c/regexp/xmlunicode.h @@ -0,0 +1,195 @@ +/* + * Summary: Unicode character APIs + * Description: API for the Unicode character APIs + * + * This file is automatically generated from the + * UCS description files of the Unicode Character Database + * http://www.unicode.org/Public/4.0-Update1/UCD-4.0.1.html + * using the genUnicode.py Python script. + * + * Generation date: Mon Mar 27 11:09:52 2006 + * Sources: Blocks-4.0.1.txt UnicodeData-4.0.1.txt + */ + +#ifndef __XML_UNICODE_H__ +#define __XML_UNICODE_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +int xmlUCSIsAegeanNumbers (int code); +int xmlUCSIsAlphabeticPresentationForms (int code); +int xmlUCSIsArabic (int code); +int xmlUCSIsArabicPresentationFormsA (int code); +int xmlUCSIsArabicPresentationFormsB (int code); +int xmlUCSIsArmenian (int code); +int xmlUCSIsArrows (int code); +int xmlUCSIsBasicLatin (int code); +int xmlUCSIsBengali (int code); +int xmlUCSIsBlockElements (int code); +int xmlUCSIsBopomofo (int code); +int xmlUCSIsBopomofoExtended (int code); +int xmlUCSIsBoxDrawing (int code); +int xmlUCSIsBraillePatterns (int code); +int xmlUCSIsBuhid (int code); +int xmlUCSIsByzantineMusicalSymbols (int code); +int xmlUCSIsCJKCompatibility (int code); +int xmlUCSIsCJKCompatibilityForms (int code); +int xmlUCSIsCJKCompatibilityIdeographs (int code); +int xmlUCSIsCJKCompatibilityIdeographsSupplement (int code); +int xmlUCSIsCJKRadicalsSupplement (int code); +int xmlUCSIsCJKSymbolsandPunctuation (int code); +int xmlUCSIsCJKUnifiedIdeographs (int code); +int xmlUCSIsCJKUnifiedIdeographsExtensionA (int code); +int xmlUCSIsCJKUnifiedIdeographsExtensionB (int code); +int xmlUCSIsCherokee (int code); +int xmlUCSIsCombiningDiacriticalMarks (int code); +int xmlUCSIsCombiningDiacriticalMarksforSymbols (int code); +int xmlUCSIsCombiningHalfMarks (int code); +int xmlUCSIsCombiningMarksforSymbols (int code); +int xmlUCSIsControlPictures (int code); +int xmlUCSIsCurrencySymbols (int code); +int xmlUCSIsCypriotSyllabary (int code); +int xmlUCSIsCyrillic (int code); +int xmlUCSIsCyrillicSupplement (int code); +int xmlUCSIsDeseret (int code); +int xmlUCSIsDevanagari (int code); +int xmlUCSIsDingbats (int code); +int xmlUCSIsEnclosedAlphanumerics (int code); +int xmlUCSIsEnclosedCJKLettersandMonths (int code); +int xmlUCSIsEthiopic (int code); +int xmlUCSIsGeneralPunctuation (int code); +int xmlUCSIsGeometricShapes (int code); +int xmlUCSIsGeorgian (int code); +int xmlUCSIsGothic (int code); +int xmlUCSIsGreek (int code); +int xmlUCSIsGreekExtended (int code); +int xmlUCSIsGreekandCoptic (int code); +int xmlUCSIsGujarati (int code); +int xmlUCSIsGurmukhi (int code); +int xmlUCSIsHalfwidthandFullwidthForms (int code); +int xmlUCSIsHangulCompatibilityJamo (int code); +int xmlUCSIsHangulJamo (int code); +int xmlUCSIsHangulSyllables (int code); +int xmlUCSIsHanunoo (int code); +int xmlUCSIsHebrew (int code); +int xmlUCSIsHighPrivateUseSurrogates (int code); +int xmlUCSIsHighSurrogates (int code); +int xmlUCSIsHiragana (int code); +int xmlUCSIsIPAExtensions (int code); +int xmlUCSIsIdeographicDescriptionCharacters (int code); +int xmlUCSIsKanbun (int code); +int xmlUCSIsKangxiRadicals (int code); +int xmlUCSIsKannada (int code); +int xmlUCSIsKatakana (int code); +int xmlUCSIsKatakanaPhoneticExtensions (int code); +int xmlUCSIsKhmer (int code); +int xmlUCSIsKhmerSymbols (int code); +int xmlUCSIsLao (int code); +int xmlUCSIsLatin1Supplement (int code); +int xmlUCSIsLatinExtendedA (int code); +int xmlUCSIsLatinExtendedB (int code); +int xmlUCSIsLatinExtendedAdditional (int code); +int xmlUCSIsLetterlikeSymbols (int code); +int xmlUCSIsLimbu (int code); +int xmlUCSIsLinearBIdeograms (int code); +int xmlUCSIsLinearBSyllabary (int code); +int xmlUCSIsLowSurrogates (int code); +int xmlUCSIsMalayalam (int code); +int xmlUCSIsMathematicalAlphanumericSymbols (int code); +int xmlUCSIsMathematicalOperators (int code); +int xmlUCSIsMiscellaneousMathematicalSymbolsA (int code); +int xmlUCSIsMiscellaneousMathematicalSymbolsB (int code); +int xmlUCSIsMiscellaneousSymbols (int code); +int xmlUCSIsMiscellaneousSymbolsandArrows (int code); +int xmlUCSIsMiscellaneousTechnical (int code); +int xmlUCSIsMongolian (int code); +int xmlUCSIsMusicalSymbols (int code); +int xmlUCSIsMyanmar (int code); +int xmlUCSIsNumberForms (int code); +int xmlUCSIsOgham (int code); +int xmlUCSIsOldItalic (int code); +int xmlUCSIsOpticalCharacterRecognition (int code); +int xmlUCSIsOriya (int code); +int xmlUCSIsOsmanya (int code); +int xmlUCSIsPhoneticExtensions (int code); +int xmlUCSIsPrivateUse (int code); +int xmlUCSIsPrivateUseArea (int code); +int xmlUCSIsRunic (int code); +int xmlUCSIsShavian (int code); +int xmlUCSIsSinhala (int code); +int xmlUCSIsSmallFormVariants (int code); +int xmlUCSIsSpacingModifierLetters (int code); +int xmlUCSIsSpecials (int code); +int xmlUCSIsSuperscriptsandSubscripts (int code); +int xmlUCSIsSupplementalArrowsA (int code); +int xmlUCSIsSupplementalArrowsB (int code); +int xmlUCSIsSupplementalMathematicalOperators (int code); +int xmlUCSIsSupplementaryPrivateUseAreaA (int code); +int xmlUCSIsSupplementaryPrivateUseAreaB (int code); +int xmlUCSIsSyriac (int code); +int xmlUCSIsTagalog (int code); +int xmlUCSIsTagbanwa (int code); +int xmlUCSIsTags (int code); +int xmlUCSIsTaiLe (int code); +int xmlUCSIsTaiXuanJingSymbols (int code); +int xmlUCSIsTamil (int code); +int xmlUCSIsTelugu (int code); +int xmlUCSIsThaana (int code); +int xmlUCSIsThai (int code); +int xmlUCSIsTibetan (int code); +int xmlUCSIsUgaritic (int code); +int xmlUCSIsUnifiedCanadianAboriginalSyllabics (int code); +int xmlUCSIsVariationSelectors (int code); +int xmlUCSIsVariationSelectorsSupplement (int code); +int xmlUCSIsYiRadicals (int code); +int xmlUCSIsYiSyllables (int code); +int xmlUCSIsYijingHexagramSymbols (int code); + +int xmlUCSIsBlock (int code, const char *block); + +int xmlUCSIsCatC (int code); +int xmlUCSIsCatCc (int code); +int xmlUCSIsCatCf (int code); +int xmlUCSIsCatCo (int code); +int xmlUCSIsCatCs (int code); +int xmlUCSIsCatL (int code); +int xmlUCSIsCatLl (int code); +int xmlUCSIsCatLm (int code); +int xmlUCSIsCatLo (int code); +int xmlUCSIsCatLt (int code); +int xmlUCSIsCatLu (int code); +int xmlUCSIsCatM (int code); +int xmlUCSIsCatMc (int code); +int xmlUCSIsCatMe (int code); +int xmlUCSIsCatMn (int code); +int xmlUCSIsCatN (int code); +int xmlUCSIsCatNd (int code); +int xmlUCSIsCatNl (int code); +int xmlUCSIsCatNo (int code); +int xmlUCSIsCatP (int code); +int xmlUCSIsCatPc (int code); +int xmlUCSIsCatPd (int code); +int xmlUCSIsCatPe (int code); +int xmlUCSIsCatPf (int code); +int xmlUCSIsCatPi (int code); +int xmlUCSIsCatPo (int code); +int xmlUCSIsCatPs (int code); +int xmlUCSIsCatS (int code); +int xmlUCSIsCatSc (int code); +int xmlUCSIsCatSk (int code); +int xmlUCSIsCatSm (int code); +int xmlUCSIsCatSo (int code); +int xmlUCSIsCatZ (int code); +int xmlUCSIsCatZl (int code); +int xmlUCSIsCatZp (int code); +int xmlUCSIsCatZs (int code); + +int xmlUCSIsCat (int code, const char *cat); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_UNICODE_H__ */ diff --git a/libxsde/xsde/cxx/parser/validating/string-common.cxx b/libxsde/xsde/cxx/parser/validating/string-common.cxx index 9558e30..1e3d8ba 100644 --- a/libxsde/xsde/cxx/parser/validating/string-common.cxx +++ b/libxsde/xsde/cxx/parser/validating/string-common.cxx @@ -3,6 +3,14 @@ // copyright : Copyright (c) 2005-2010 Code Synthesis Tools CC // license : GNU GPL v2 + exceptions; see accompanying LICENSE file +#include + +#ifdef XSDE_REGEXP +#ifdef XSDE_EXCEPTIONS +# include // std::bad_alloc +#endif +#endif + #include #include @@ -116,6 +124,41 @@ namespace xsde } } +#ifdef XSDE_REGEXP + if (f.pattern_set_ != 0) + { + if (f.pattern_set_ == 1) + { + xmlRegexpPtr r = xmlRegexpCompile ( + reinterpret_cast (f.pattern_.str)); + + if (r == 0) + { +#ifdef XSDE_EXCEPTIONS + throw std::bad_alloc (); +#else + ctx.sys_error (sys_error::no_memory); + return false; +#endif + + } + + string_facets::facets& t = + const_cast (f); + + t.pattern_.regexp = r; + t.pattern_set_ = 2; + } + + if (xmlRegexpExec ( + f.pattern_.regexp, + reinterpret_cast (s)) != 1) + { + ctx.schema_error (schema_error::value_pattern_mismatch); + return false; + } + } +#endif return true; } } diff --git a/libxsde/xsde/cxx/parser/validating/xml-schema-pskel.hxx b/libxsde/xsde/cxx/parser/validating/xml-schema-pskel.hxx index 1ef1005..8823846 100644 --- a/libxsde/xsde/cxx/parser/validating/xml-schema-pskel.hxx +++ b/libxsde/xsde/cxx/parser/validating/xml-schema-pskel.hxx @@ -12,6 +12,10 @@ # include #endif +#ifdef XSDE_REGEXP +# include +#endif + #include #include @@ -662,6 +666,9 @@ namespace xsde struct string_facets { string_facets (); +#ifdef XSDE_REGEXP + ~string_facets (); +#endif void _length_facet (size_t); @@ -678,6 +685,9 @@ namespace xsde void _enumeration_facet (const char* const*, size_t count); + void + _pattern_facet (const char*); + public: struct facets { @@ -688,10 +698,24 @@ namespace xsde const char* const* enum_; size_t enum_count_; +#ifdef XSDE_REGEXP + union + { + const char* str; + xmlRegexpPtr regexp; + } pattern_; +#endif unsigned int length_set_ : 1; unsigned int min_length_set_ : 1; unsigned int max_length_set_ : 1; +#ifdef XSDE_REGEXP + // 0 - not set + // 1 - string + // 2 - compiled + // + unsigned int pattern_set_: 2; +#endif // 0 - preserve // 1 - replace // 2 - collapse diff --git a/libxsde/xsde/cxx/parser/validating/xml-schema-pskel.ixx b/libxsde/xsde/cxx/parser/validating/xml-schema-pskel.ixx index 0a8f99a..a9d01f1 100644 --- a/libxsde/xsde/cxx/parser/validating/xml-schema-pskel.ixx +++ b/libxsde/xsde/cxx/parser/validating/xml-schema-pskel.ixx @@ -458,7 +458,20 @@ namespace xsde facets_.enum_ = 0; facets_.enum_count_ = 0; + +#ifdef XSDE_REGEXP + facets_.pattern_set_ = 0; +#endif + } + +#ifdef XSDE_REGEXP + inline string_facets:: + ~string_facets () + { + if (facets_.pattern_set_ == 2) + xmlRegFreeRegexp (facets_.pattern_.regexp); } +#endif inline void string_facets:: _length_facet (size_t v) @@ -494,6 +507,20 @@ namespace xsde facets_.enum_count_ = count; } +#ifndef XSDE_REGEXP + inline void string_facets:: + _pattern_facet (const char*) + { + } +#else + inline void string_facets:: + _pattern_facet (const char* s) + { + facets_.pattern_.str = s; + facets_.pattern_set_ = 1; + } +#endif + // string_pskel // #ifdef XSDE_REUSE_STYLE_TIEIN diff --git a/libxsde/xsde/cxx/schema-error.cxx b/libxsde/xsde/cxx/schema-error.cxx index a37ea28..aee2642 100644 --- a/libxsde/xsde/cxx/schema-error.cxx +++ b/libxsde/xsde/cxx/schema-error.cxx @@ -59,6 +59,7 @@ namespace xsde "value is greater than maximum allowed", "value is less than minimum allowed", "value is not in enumeration", + "value does not match pattern", "length is greater than maximum allowed", "length is less than minimum allowed", "length is not equal to prescribed length", diff --git a/libxsde/xsde/cxx/schema-error.hxx b/libxsde/xsde/cxx/schema-error.hxx index f442bf6..a20deac 100644 --- a/libxsde/xsde/cxx/schema-error.hxx +++ b/libxsde/xsde/cxx/schema-error.hxx @@ -62,6 +62,7 @@ namespace xsde value_greater_than_max, value_less_than_min, value_not_in_enumeration, + value_pattern_mismatch, length_greater_than_max, length_less_than_min, length_not_equal_prescribed, diff --git a/libxsde/xsde/cxx/serializer/validating/string-common.cxx b/libxsde/xsde/cxx/serializer/validating/string-common.cxx index 5dab6c9..53958a4 100644 --- a/libxsde/xsde/cxx/serializer/validating/string-common.cxx +++ b/libxsde/xsde/cxx/serializer/validating/string-common.cxx @@ -3,7 +3,16 @@ // copyright : Copyright (c) 2005-2010 Code Synthesis Tools CC // license : GNU GPL v2 + exceptions; see accompanying LICENSE file +#include + #include // strlen + +#ifdef XSDE_REGEXP +#ifdef XSDE_EXCEPTIONS +# include // std::bad_alloc +#endif +#endif + #include #include @@ -23,7 +32,11 @@ namespace xsde if (f.length_set_ || f.min_length_set_ || f.max_length_set_ || - f.enum_count_ != 0) + f.enum_count_ != 0 +#ifdef XSDE_REGEXP + || f.pattern_set_ != 0 +#endif + ) { return validate_facets (s, strlen (s), f, ctx); } @@ -64,6 +77,41 @@ namespace xsde } } +#ifdef XSDE_REGEXP + if (f.pattern_set_ != 0) + { + if (f.pattern_set_ == 1) + { + xmlRegexpPtr r = xmlRegexpCompile ( + reinterpret_cast (f.pattern_.str)); + + if (r == 0) + { +#ifdef XSDE_EXCEPTIONS + throw std::bad_alloc (); +#else + ctx.sys_error (sys_error::no_memory); + return false; +#endif + + } + + string_facets::facets& t = + const_cast (f); + + t.pattern_.regexp = r; + t.pattern_set_ = 2; + } + + if (xmlRegexpExec ( + f.pattern_.regexp, + reinterpret_cast (s)) != 1) + { + ctx.schema_error (schema_error::value_pattern_mismatch); + return false; + } + } +#endif return true; } } diff --git a/libxsde/xsde/cxx/serializer/validating/xml-schema-sskel.hxx b/libxsde/xsde/cxx/serializer/validating/xml-schema-sskel.hxx index a97742a..b93012d 100644 --- a/libxsde/xsde/cxx/serializer/validating/xml-schema-sskel.hxx +++ b/libxsde/xsde/cxx/serializer/validating/xml-schema-sskel.hxx @@ -12,6 +12,10 @@ # include #endif +#ifdef XSDE_REGEXP +# include +#endif + #include #include @@ -643,6 +647,9 @@ namespace xsde struct string_facets { string_facets (); +#ifdef XSDE_REGEXP + ~string_facets (); +#endif void _length_facet (size_t); @@ -656,6 +663,9 @@ namespace xsde void _enumeration_facet (const char* const*, size_t count); + void + _pattern_facet (const char*); + public: struct facets { @@ -666,9 +676,24 @@ namespace xsde const char* const* enum_; size_t enum_count_; +#ifdef XSDE_REGEXP + union + { + const char* str; + xmlRegexpPtr regexp; + } pattern_; +#endif unsigned int length_set_ : 1; unsigned int min_length_set_ : 1; unsigned int max_length_set_ : 1; + +#ifdef XSDE_REGEXP + // 0 - not set + // 1 - string + // 2 - compiled + // + unsigned int pattern_set_: 2; +#endif }; protected: diff --git a/libxsde/xsde/cxx/serializer/validating/xml-schema-sskel.ixx b/libxsde/xsde/cxx/serializer/validating/xml-schema-sskel.ixx index d62fdd4..35ce5d9 100644 --- a/libxsde/xsde/cxx/serializer/validating/xml-schema-sskel.ixx +++ b/libxsde/xsde/cxx/serializer/validating/xml-schema-sskel.ixx @@ -458,7 +458,20 @@ namespace xsde facets_.enum_ = 0; facets_.enum_count_ = 0; + +#ifdef XSDE_REGEXP + facets_.pattern_set_ = 0; +#endif + } + +#ifdef XSDE_REGEXP + inline string_facets:: + ~string_facets () + { + if (facets_.pattern_set_ == 2) + xmlRegFreeRegexp (facets_.pattern_.regexp); } +#endif inline void string_facets:: _length_facet (size_t v) @@ -488,6 +501,20 @@ namespace xsde facets_.enum_count_ = count; } +#ifndef XSDE_REGEXP + inline void string_facets:: + _pattern_facet (const char*) + { + } +#else + inline void string_facets:: + _pattern_facet (const char* s) + { + facets_.pattern_.str = s; + facets_.pattern_set_ = 1; + } +#endif + // string_sskel // #ifdef XSDE_REUSE_STYLE_TIEIN diff --git a/libxsde/xsde/makefile b/libxsde/xsde/makefile index 51edf31..9787855 100644 --- a/libxsde/xsde/makefile +++ b/libxsde/xsde/makefile @@ -8,6 +8,12 @@ include $(dir $(lastword $(MAKEFILE_LIST)))../../build/bootstrap.make c_tun := c/expat/xmlparse.c c/expat/xmlrole.c c/expat/xmltok.c c_tun += c/genx/genx.c c/genx/char-props.c +ifneq ($(xsde_parser_validation)$(xsde_serializer_validation),nn) +ifeq ($(xsde_regexp),y) +c_tun += c/regexp/chvalid.c c/regexp/xmlunicode.c c/regexp/xmlregexp.c +endif +endif + ifeq ($(xsde_custom_allocator),y) ifeq ($(xsde_default_allocator),y) c_tun += allocator.c @@ -40,6 +46,11 @@ endif ifneq ($(xsde_parser_validation)$(xsde_serializer_validation),nn) cxx_tun += cxx/schema-error.cxx + +ifeq ($(xsde_regexp),y) +cxx_tun += cxx/schema-error.cxx +endif + endif ifeq ($(xsde_polymorphic),y) @@ -590,6 +601,11 @@ ifeq ($(xsde_serializer_validation),y) else @echo '#undef XSDE_SERIALIZER_VALIDATION' >>$@ endif +ifeq ($(xsde_regexp),y) + @echo '#define XSDE_REGEXP' >>$@ +else + @echo '#undef XSDE_REGEXP' >>$@ +endif ifeq ($(xsde_reuse_style),mixin) @echo '#define XSDE_REUSE_STYLE_MIXIN' >>$@ else diff --git a/tests/cxx/hybrid/makefile b/tests/cxx/hybrid/makefile index cef5ffc..af610f5 100644 --- a/tests/cxx/hybrid/makefile +++ b/tests/cxx/hybrid/makefile @@ -11,7 +11,7 @@ include $(dir $(lastword $(MAKEFILE_LIST)))../../../build/bootstrap.make all_tests := sequences polymorphism iterator built-in default enumeration \ iso8859-1 list recursive test-template union binary/cdr binary/xdr choice \ -clone +clone pattern build_tests := sequences @@ -26,7 +26,7 @@ endif endif ifeq ($(xsde_iostream),y) -build_tests += built-in default enumeration list test-template union +build_tests += built-in default enumeration list test-template union pattern ifeq ($(xsde_encoding),iso8859-1) build_tests += iso8859-1 diff --git a/tests/cxx/hybrid/pattern/driver.cxx b/tests/cxx/hybrid/pattern/driver.cxx new file mode 100644 index 0000000..b9d8663 --- /dev/null +++ b/tests/cxx/hybrid/pattern/driver.cxx @@ -0,0 +1,56 @@ +// file : tests/cxx/hybrid/pattern/driver.cxx +// author : Boris Kolpackov +// copyright : Copyright (c) 2006-2010 Code Synthesis Tools CC +// license : GNU GPL v2 + exceptions; see accompanying LICENSE file + +// Test the pattern facet validation. +// + +#include + +#include "test.hxx" +#include "test-pimpl.hxx" +#include "test-simpl.hxx" + +using namespace std; +using namespace test; + +int +main (int argc, char* argv[]) +{ + if (argc != 2) + { + cerr << "usage: " << argv[0] << " test.xml" << endl; + return 1; + } + + // Parse. + // + root_paggr root_p; + + xml_schema::document_pimpl doc_p ( + root_p.root_parser (), + root_p.root_namespace (), + root_p.root_name ()); + + root_p.pre (); + doc_p.parse (argv[1]); + type* r = root_p.post (); + + // Serialize. + // + root_saggr root_s; + + xml_schema::document_simpl doc_s ( + root_s.root_serializer (), + root_s.root_namespace (), + root_s.root_name ()); + + doc_s.add_prefix ("t", "test"); + + root_s.pre (*r); + doc_s.serialize (cout, xml_schema::document_simpl::pretty_print); + root_s.post (); + + delete r; +} diff --git a/tests/cxx/hybrid/pattern/makefile b/tests/cxx/hybrid/pattern/makefile new file mode 100644 index 0000000..8313805 --- /dev/null +++ b/tests/cxx/hybrid/pattern/makefile @@ -0,0 +1,108 @@ +# file : tests/cxx/hybrid/pattern/makefile +# author : Boris Kolpackov +# copyright : Copyright (c) 2006-2010 Code Synthesis Tools CC +# license : GNU GPL v2 + exceptions; see accompanying LICENSE file + +include $(dir $(lastword $(MAKEFILE_LIST)))../../../../build/bootstrap.make + +xsd := test.xsd +cxx := driver.cxx + +obj := $(addprefix $(out_base)/,\ +$(cxx:.cxx=.o) \ +$(xsd:.xsd=.o) \ +$(xsd:.xsd=-pskel.o) \ +$(xsd:.xsd=-pimpl.o) \ +$(xsd:.xsd=-sskel.o) \ +$(xsd:.xsd=-simpl.o)) + +dep := $(obj:.o=.o.d) + +xsde.l := $(out_root)/libxsde/xsde/xsde.l +xsde.l.cpp-options := $(out_root)/libxsde/xsde/xsde.l.cpp-options + +driver := $(out_base)/driver +test := $(out_base)/.test +dist := $(out_base)/.dist +dist-win := $(out_base)/.dist-win +clean := $(out_base)/.clean + + +# Build. +# +$(driver): $(obj) $(xsde.l) + +$(obj) $(dep): $(xsde.l.cpp-options) + +genf := $(xsd:.xsd=.hxx) $(xsd:.xsd=.cxx) \ + $(xsd:.xsd=-pskel.hxx) $(xsd:.xsd=-pskel.cxx) \ + $(xsd:.xsd=-pimpl.hxx) $(xsd:.xsd=-pimpl.cxx) \ + $(xsd:.xsd=-sskel.hxx) $(xsd:.xsd=-sskel.cxx) \ + $(xsd:.xsd=-simpl.hxx) $(xsd:.xsd=-simpl.cxx) + +gen := $(addprefix $(out_base)/,$(genf)) + +$(gen): $(out_root)/xsde/xsde +$(gen): xsde := $(out_root)/xsde/xsde +$(gen) $(dist) $(dist-win): xsde_options += --generate-parser \ +--generate-serializer --generate-aggregate + +$(call include-dep,$(dep)) + +# Convenience alias for default target. +# +$(out_base)/: $(driver) + + +# Test. +# +$(test): driver := $(driver) +$(test): $(driver) $(src_base)/test-000.xml $(src_base)/test-000.std + $(call message,test $$1,$$1 $(src_base)/test-000.xml | diff -u $(src_base)/test-000.std -,$(driver)) + + +# Dist. +# +$(dist) $(dist-win): opt := -src $(src_base) -cmd cxx-hybrid -xsd "$(xsd)" \ +-cxx "$(cxx)" -gen "$(genf)" -opt "$(xsde_options)" -out $(dist_prefix) + +$(dist): + $(call message,install $(src_base),$(scf_root)/dist $(opt)) + +$(dist-win): + $(call message,install $(src_base),$(scf_root)/dist -win $(opt)) + + +# Clean. +# +$(clean): $(driver).o.clean \ + $(addsuffix .cxx.clean,$(obj)) \ + $(addsuffix .cxx.clean,$(dep)) \ + $(addprefix $(out_base)/,$(xsd:.xsd=.cxx.xsd.clean)) + + +# Generated .gitignore. +# +ifeq ($(out_base),$(src_base)) +$(gen): | $(out_base)/.gitignore +$(driver): | $(out_base)/.gitignore + +$(out_base)/.gitignore: files := driver $(genf) +$(clean): $(out_base)/.gitignore.clean + +$(call include,$(bld_root)/git/gitignore.make) +endif + + +# How to. +# +$(call include,$(bld_root)/cxx/o-e.make) +$(call include,$(bld_root)/cxx/cxx-o.make) +$(call include,$(bld_root)/cxx/cxx-d.make) +$(call include,$(scf_root)/xsde/hybrid/xsd-cxx.make) + + +# Dependencies. +# +$(call import,$(src_root)/xsde/makefile) +$(call import,$(src_root)/libxsde/xsde/makefile) diff --git a/tests/cxx/hybrid/pattern/test-000.std b/tests/cxx/hybrid/pattern/test-000.std new file mode 100644 index 0000000..7b0d83a --- /dev/null +++ b/tests/cxx/hybrid/pattern/test-000.std @@ -0,0 +1,11 @@ + + foobarbaz + 12,32,123,321, + 12321 + 12321 + + + + + abcdef + \ No newline at end of file diff --git a/tests/cxx/hybrid/pattern/test-000.xml b/tests/cxx/hybrid/pattern/test-000.xml new file mode 100644 index 0000000..d503cb1 --- /dev/null +++ b/tests/cxx/hybrid/pattern/test-000.xml @@ -0,0 +1,13 @@ + + + foobarbaz + 12,32,123,321, + 12321 + 12321 + + + + + abcdef + + diff --git a/tests/cxx/hybrid/pattern/test.xsd b/tests/cxx/hybrid/pattern/test.xsd new file mode 100644 index 0000000..e008709 --- /dev/null +++ b/tests/cxx/hybrid/pattern/test.xsd @@ -0,0 +1,53 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/xsde/cxx/parser/elements.cxx b/xsde/cxx/parser/elements.cxx index 2c138ce..25e2032 100644 --- a/xsde/cxx/parser/elements.cxx +++ b/xsde/cxx/parser/elements.cxx @@ -207,24 +207,30 @@ namespace CXX SemanticGraph::Type& ub (ultimate_base (c)); Restricts::FacetIterator end (r.facet_end ()); - if ((ub.is_a () || - ub.is_a ()) && - r.facet_find (L"whiteSpace") != end) - return true; - - if (validation) + if (ub.is_a () || + ub.is_a ()) { - if (ub.is_a () || - ub.is_a () || - ub.is_a () || - ub.is_a () || - ub.is_a () || - ub.is_a ()) + if (r.facet_find (L"whiteSpace") != end) + return true; + + if (validation) { if (r.facet_find (L"length") != end || r.facet_find (L"minLength") != end || r.facet_find (L"maxLength") != end || - r.facet_find (L"minInclusive") != end || + r.facet_find (L"pattern") != end) + return true; + } + } + + if (ub.is_a () || + ub.is_a () || + ub.is_a () || + ub.is_a ()) + { + if (validation) + { + if (r.facet_find (L"minInclusive") != end || r.facet_find (L"minExclusive") != end || r.facet_find (L"maxInclusive") != end || r.facet_find (L"maxExclusive") != end) diff --git a/xsde/cxx/parser/parser-inline.cxx b/xsde/cxx/parser/parser-inline.cxx index 984eca6..d20a3e7 100644 --- a/xsde/cxx/parser/parser-inline.cxx +++ b/xsde/cxx/parser/parser-inline.cxx @@ -17,54 +17,83 @@ namespace CXX Void facet_calls (SemanticGraph::Complex& c, Context& ctx) { + using SemanticGraph::Restricts; + std::wostream& os (ctx.os); - using SemanticGraph::Restricts; + SemanticGraph::Type& ub (ctx.ultimate_base (c)); Restricts& r (dynamic_cast (c.inherits ())); - for (Restricts::FacetIterator i (r.facet_begin ()); - i != r.facet_end (); ++i) + if (ub.is_a () || + ub.is_a ()) { - if (i->first == L"length") - { - os << "this->_length_facet (" << i->second << "UL);"; - } - else if (i->first == L"minLength") - { - os << "this->_min_length_facet (" << i->second << "UL);"; - } - else if (i->first == L"maxLength") - { - os << "this->_max_length_facet (" << i->second << "UL);"; - } - else if (i->first == L"minInclusive") - { - os << "this->_min_facet (" << i->second << ", true);"; - } - else if (i->first == L"minExclusive") + for (Restricts::FacetIterator i (r.facet_begin ()); + i != r.facet_end (); ++i) { - os << "this->_min_facet (" << i->second << ", false);"; - } - else if (i->first == L"maxInclusive") - { - os << "this->_max_facet (" << i->second << ", true);"; - } - else if (i->first == L"maxExclusive") - { - os << "this->_max_facet (" << i->second << ", false);"; + if (i->first == L"whiteSpace") + { + os << "this->_whitespace_facet ("; + + if (i->second == L"preserve") + os << "0"; + else if (i->second == L"replace") + os << "1"; + else if (i->second == L"collapse") + os << "2"; + + os << ");"; + continue; + } + + if (!ctx.validation) + continue; + + if (i->first == L"length") + { + os << "this->_length_facet (" << i->second << "UL);"; + } + else if (i->first == L"minLength") + { + os << "this->_min_length_facet (" << i->second << "UL);"; + } + else if (i->first == L"maxLength") + { + os << "this->_max_length_facet (" << i->second << "UL);"; + } + else if (i->first == L"pattern") + { + os << "this->_pattern_facet (" << ctx.strlit (i->second) << ");"; + } } - else if (i->first == L"whiteSpace") - { - os << "this->_whitespace_facet ("; + } - if (i->second == L"preserve") - os << "0"; - else if (i->second == L"replace") - os << "1"; - else if (i->second == L"collapse") - os << "2"; + if (ub.is_a () || + ub.is_a () || + ub.is_a () || + ub.is_a ()) + { + for (Restricts::FacetIterator i (r.facet_begin ()); + i != r.facet_end (); ++i) + { + if (!ctx.validation) + continue; - os << ");"; + if (i->first == L"minInclusive") + { + os << "this->_min_facet (" << i->second << ", true);"; + } + else if (i->first == L"minExclusive") + { + os << "this->_min_facet (" << i->second << ", false);"; + } + else if (i->first == L"maxInclusive") + { + os << "this->_max_facet (" << i->second << ", true);"; + } + else if (i->first == L"maxExclusive") + { + os << "this->_max_facet (" << i->second << ", false);"; + } } } } diff --git a/xsde/cxx/serializer/elements.cxx b/xsde/cxx/serializer/elements.cxx index 62fd32d..e598f8f 100644 --- a/xsde/cxx/serializer/elements.cxx +++ b/xsde/cxx/serializer/elements.cxx @@ -225,24 +225,34 @@ namespace CXX return false; SemanticGraph::Type& ub (ultimate_base (c)); + Restricts::FacetIterator end (r.facet_end ()); + + if (ub.is_a () || + ub.is_a ()) + { + if (validation) + { + if (r.facet_find (L"length") != end || + r.facet_find (L"minLength") != end || + r.facet_find (L"maxLength") != end || + r.facet_find (L"pattern") != end) + return true; + } + } if (ub.is_a () || ub.is_a () || ub.is_a () || - ub.is_a () || - ub.is_a () || - ub.is_a ()) + ub.is_a ()) { - Restricts::FacetIterator end (r.facet_end ()); - - if (r.facet_find (L"length") != end || - r.facet_find (L"minLength") != end || - r.facet_find (L"maxLength") != end || - r.facet_find (L"minInclusive") != end || - r.facet_find (L"minExclusive") != end || - r.facet_find (L"maxInclusive") != end || - r.facet_find (L"maxExclusive") != end) - return true; + if (validation) + { + if (r.facet_find (L"minInclusive") != end || + r.facet_find (L"minExclusive") != end || + r.facet_find (L"maxInclusive") != end || + r.facet_find (L"maxExclusive") != end) + return true; + } } } diff --git a/xsde/cxx/serializer/serializer-inline.cxx b/xsde/cxx/serializer/serializer-inline.cxx index f888e4f..4bebc9c 100644 --- a/xsde/cxx/serializer/serializer-inline.cxx +++ b/xsde/cxx/serializer/serializer-inline.cxx @@ -17,41 +17,68 @@ namespace CXX Void facet_calls (SemanticGraph::Complex& c, Context& ctx) { + using SemanticGraph::Restricts; + std::wostream& os (ctx.os); - using SemanticGraph::Restricts; + SemanticGraph::Type& ub (ctx.ultimate_base (c)); Restricts& r (dynamic_cast (c.inherits ())); - for (Restricts::FacetIterator i (r.facet_begin ()); - i != r.facet_end (); ++i) + if (ub.is_a () || + ub.is_a ()) { - if (i->first == L"length") - { - os << "this->_length_facet (" << i->second << "UL);"; - } - else if (i->first == L"minLength") - { - os << "this->_min_length_facet (" << i->second << "UL);"; - } - else if (i->first == L"maxLength") - { - os << "this->_max_length_facet (" << i->second << "UL);"; - } - else if (i->first == L"minInclusive") + for (Restricts::FacetIterator i (r.facet_begin ()); + i != r.facet_end (); ++i) { - os << "this->_min_facet (" << i->second << ", true);"; - } - else if (i->first == L"minExclusive") - { - os << "this->_min_facet (" << i->second << ", false);"; - } - else if (i->first == L"maxInclusive") - { - os << "this->_max_facet (" << i->second << ", true);"; + if (!ctx.validation) + continue; + + if (i->first == L"length") + { + os << "this->_length_facet (" << i->second << "UL);"; + } + else if (i->first == L"minLength") + { + os << "this->_min_length_facet (" << i->second << "UL);"; + } + else if (i->first == L"maxLength") + { + os << "this->_max_length_facet (" << i->second << "UL);"; + } + else if (i->first == L"pattern") + { + os << "this->_pattern_facet (" << ctx.strlit (i->second) << ");"; + } } - else if (i->first == L"maxExclusive") + } + + if (ub.is_a () || + ub.is_a () || + ub.is_a () || + ub.is_a ()) + { + for (Restricts::FacetIterator i (r.facet_begin ()); + i != r.facet_end (); ++i) { - os << "this->_max_facet (" << i->second << ", false);"; + if (!ctx.validation) + continue; + + if (i->first == L"minInclusive") + { + os << "this->_min_facet (" << i->second << ", true);"; + } + else if (i->first == L"minExclusive") + { + os << "this->_min_facet (" << i->second << ", false);"; + } + else if (i->first == L"maxInclusive") + { + os << "this->_max_facet (" << i->second << ", true);"; + } + else if (i->first == L"maxExclusive") + { + os << "this->_max_facet (" << i->second << ", false);"; + } } } } -- cgit v1.1