aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS12
-rw-r--r--build/configuration.make2
-rwxr-xr-xbuild/configure7
-rw-r--r--dist/build/config.make1
-rw-r--r--dist/config/config.make9
-rw-r--r--dist/config/config.nmake9
-rw-r--r--dist/etc/evc-4.0/config.nmake9
-rw-r--r--dist/etc/integrity/config.make9
-rw-r--r--dist/etc/iphone/config-device.make9
-rw-r--r--dist/etc/iphone/config-simulator.make9
-rw-r--r--dist/etc/lynxos/config-4.2.make9
-rw-r--r--dist/etc/lynxos/config-5.0.make9
-rw-r--r--dist/etc/qnx/config-6.3-gcc-2.95.make9
-rw-r--r--dist/etc/qnx/config-6.3-gcc-3.3.make9
-rw-r--r--dist/etc/qnx/config-6.4.make9
-rw-r--r--dist/etc/vc-8.0/config-max.nmake9
-rw-r--r--dist/etc/vc-8.0/config-min.nmake9
-rw-r--r--dist/etc/vc-9.0/config-max.nmake9
-rw-r--r--dist/etc/vc-9.0/config-min.nmake9
-rw-r--r--dist/etc/vxworks/config-5.5.1.make9
-rw-r--r--dist/etc/vxworks/config-6.4-max.make9
-rw-r--r--dist/etc/vxworks/config-6.4-min.make9
-rw-r--r--dist/etc/vxworks/config-6.7-max.make9
-rw-r--r--dist/etc/vxworks/config-6.7-min.make9
-rw-r--r--dist/libxsde/xsde/makefile9
-rw-r--r--dist/libxsde/xsde/nmakefile11
-rw-r--r--dist/tests/cxx/hybrid/makefile2
-rw-r--r--dist/tests/cxx/hybrid/nmakefile2
-rw-r--r--libxsde/xsde/c/regexp/COPYING40
-rw-r--r--libxsde/xsde/c/regexp/chvalid.c388
-rw-r--r--libxsde/xsde/c/regexp/chvalid.h63
-rw-r--r--libxsde/xsde/c/regexp/xmlregexp.c4375
-rw-r--r--libxsde/xsde/c/regexp/xmlregexp.h39
-rw-r--r--libxsde/xsde/c/regexp/xmlunicode.c3172
-rw-r--r--libxsde/xsde/c/regexp/xmlunicode.h195
-rw-r--r--libxsde/xsde/cxx/parser/validating/string-common.cxx43
-rw-r--r--libxsde/xsde/cxx/parser/validating/xml-schema-pskel.hxx24
-rw-r--r--libxsde/xsde/cxx/parser/validating/xml-schema-pskel.ixx27
-rw-r--r--libxsde/xsde/cxx/schema-error.cxx1
-rw-r--r--libxsde/xsde/cxx/schema-error.hxx1
-rw-r--r--libxsde/xsde/cxx/serializer/validating/string-common.cxx50
-rw-r--r--libxsde/xsde/cxx/serializer/validating/xml-schema-sskel.hxx25
-rw-r--r--libxsde/xsde/cxx/serializer/validating/xml-schema-sskel.ixx27
-rw-r--r--libxsde/xsde/makefile16
-rw-r--r--tests/cxx/hybrid/makefile4
-rw-r--r--tests/cxx/hybrid/pattern/driver.cxx56
-rw-r--r--tests/cxx/hybrid/pattern/makefile108
-rw-r--r--tests/cxx/hybrid/pattern/test-000.std11
-rw-r--r--tests/cxx/hybrid/pattern/test-000.xml13
-rw-r--r--tests/cxx/hybrid/pattern/test.xsd53
-rw-r--r--xsde/cxx/parser/elements.cxx32
-rw-r--r--xsde/cxx/parser/parser-inline.cxx107
-rw-r--r--xsde/cxx/serializer/elements.cxx36
-rw-r--r--xsde/cxx/serializer/serializer-inline.cxx81
54 files changed, 9115 insertions, 98 deletions
diff --git a/NEWS b/NEWS
index 08c82ca..44f06c9 100644
--- a/NEWS
+++ b/NEWS
@@ -66,6 +66,18 @@ Version 3.2.0
automatically converted to equivalent enumeration types with a union
of all the member's enumerators.
+ C++/Parser
+
+ * Support for validation of the pattern XML Schema facet on string-base
+ types. See the XSDE_REGEXP parameter in the configuration files for
+ details.
+
+ C++/Serializer
+
+ * Support for validation of the pattern XML Schema facet on string-base
+ types. See the XSDE_REGEXP parameter in the configuration files for
+ details.
+
Version 3.1.0
C++/Hybrid
diff --git a/build/configuration.make b/build/configuration.make
index 0de6d47..cea0216 100644
--- a/build/configuration.make
+++ b/build/configuration.make
@@ -19,6 +19,7 @@ xsde_longlong :=
xsde_snprintf :=
xsde_parser_validation :=
xsde_serializer_validation :=
+xsde_regexp :=
xsde_reuse_style :=
xsde_custom_allocator :=
xsde_default_allocator :=
@@ -47,6 +48,7 @@ $(out_root)/%: xsde_longlong := $(xsde_longlong)
$(out_root)/%: xsde_snprintf := $(xsde_snprintf)
$(out_root)/%: xsde_parser_validation := $(xsde_parser_validation)
$(out_root)/%: xsde_serializer_validation := $(xsde_serializer_validation)
+$(out_root)/%: xsde_regexp := $(xsde_regexp)
$(out_root)/%: xsde_reuse_style := $(xsde_reuse_style)
$(out_root)/%: xsde_custom_allocator := $(xsde_custom_allocator)
$(out_root)/%: xsde_default_allocator := $(xsde_default_allocator)
diff --git a/build/configure b/build/configure
index 7e37c13..7f22df9 100755
--- a/build/configure
+++ b/build/configure
@@ -97,6 +97,12 @@ $echo
serializer_validation=`read_y_n y`
$echo
+$echo "Would you like to include regexp support for xs:pattern validation?"
+$echo
+
+regexp=`read_y_n y`
+
+$echo
$echo "Please select the base parser/serializer reuse style you would"
$echo "like to use:"
$echo
@@ -197,6 +203,7 @@ echo "xsde_longlong := $longlong" >>
echo "xsde_snprintf := $snprintf" >>$1
echo "xsde_parser_validation := $parser_validation" >>$1
echo "xsde_serializer_validation := $serializer_validation" >>$1
+echo "xsde_regexp := $regexp" >>$1
echo "xsde_reuse_style := $reuse_style" >>$1
echo "xsde_custom_allocator := $allocator" >>$1
echo "xsde_default_allocator := $allocator_default" >>$1
diff --git a/dist/build/config.make b/dist/build/config.make
index 33b6a8e..e7e9aaa 100644
--- a/dist/build/config.make
+++ b/dist/build/config.make
@@ -23,6 +23,7 @@ XSDE_LONGLONG := $(strip $(XSDE_LONGLONG))
XSDE_SNPRINTF := $(strip $(XSDE_SNPRINTF))
XSDE_PARSER_VALIDATION := $(strip $(XSDE_PARSER_VALIDATION))
XSDE_SERIALIZER_VALIDATION := $(strip $(XSDE_SERIALIZER_VALIDATION))
+XSDE_REGEXP := $(strip $(XSDE_REGEXP))
XSDE_REUSE_STYLE := $(strip $(XSDE_REUSE_STYLE))
XSDE_CUSTOM_ALLOCATOR := $(strip $(XSDE_CUSTOM_ALLOCATOR))
XSDE_DEFAULT_ALLOCATOR := $(strip $(XSDE_DEFAULT_ALLOCATOR))
diff --git a/dist/config/config.make b/dist/config/config.make
index 95dc46f..426384e 100644
--- a/dist/config/config.make
+++ b/dist/config/config.make
@@ -108,6 +108,15 @@ XSDE_PARSER_VALIDATION := y
XSDE_SERIALIZER_VALIDATION := y
+# Set to 'y' if you would like to have support for regular expressions in
+# the XSD/e runtime. If the regexp support is enabled, then the parser and
+# serializer validation code will use it to validate the xs:pattern facet.
+# If the regexp support is disabled, then this facet will be ignored. The
+# regexp support increases the resulting executable size by about 30-50Kb.
+#
+XSDE_REGEXP := n
+
+
# Base parser/serializer implementation reuse style. Valid values are:
#
# 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin)
diff --git a/dist/config/config.nmake b/dist/config/config.nmake
index 3e23c25..24a267f 100644
--- a/dist/config/config.nmake
+++ b/dist/config/config.nmake
@@ -107,6 +107,15 @@ XSDE_PARSER_VALIDATION = y
XSDE_SERIALIZER_VALIDATION = y
+# Set to 'y' if you would like to have support for regular expressions in
+# the XSD/e runtime. If the regexp support is enabled, then the parser and
+# serializer validation code will use it to validate the xs:pattern facet.
+# If the regexp support is disabled, then this facet will be ignored. The
+# regexp support increases the resulting executable size by about 30-50Kb.
+#
+XSDE_REGEXP = n
+
+
# Base parser/serializer implementation reuse style. Valid values are:
#
# 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin)
diff --git a/dist/etc/evc-4.0/config.nmake b/dist/etc/evc-4.0/config.nmake
index 825c909..e5b9a56 100644
--- a/dist/etc/evc-4.0/config.nmake
+++ b/dist/etc/evc-4.0/config.nmake
@@ -110,6 +110,15 @@ XSDE_PARSER_VALIDATION = y
XSDE_SERIALIZER_VALIDATION = y
+# Set to 'y' if you would like to have support for regular expressions in
+# the XSD/e runtime. If the regexp support is enabled, then the parser and
+# serializer validation code will use it to validate the xs:pattern facet.
+# If the regexp support is disabled, then this facet will be ignored. The
+# regexp support increases the resulting executable size by about 30-50Kb.
+#
+XSDE_REGEXP = n
+
+
# Base parser/serializer implementation reuse style. Valid values are:
#
# 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin)
diff --git a/dist/etc/integrity/config.make b/dist/etc/integrity/config.make
index 943b208..5755056 100644
--- a/dist/etc/integrity/config.make
+++ b/dist/etc/integrity/config.make
@@ -113,6 +113,15 @@ XSDE_PARSER_VALIDATION := y
XSDE_SERIALIZER_VALIDATION := y
+# Set to 'y' if you would like to have support for regular expressions in
+# the XSD/e runtime. If the regexp support is enabled, then the parser and
+# serializer validation code will use it to validate the xs:pattern facet.
+# If the regexp support is disabled, then this facet will be ignored. The
+# regexp support increases the resulting executable size by about 30-50Kb.
+#
+XSDE_REGEXP := n
+
+
# Base parser/serializer implementation reuse style. Valid values are:
#
# 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin)
diff --git a/dist/etc/iphone/config-device.make b/dist/etc/iphone/config-device.make
index 8d11943..9b98c36 100644
--- a/dist/etc/iphone/config-device.make
+++ b/dist/etc/iphone/config-device.make
@@ -115,6 +115,15 @@ XSDE_PARSER_VALIDATION := y
XSDE_SERIALIZER_VALIDATION := y
+# Set to 'y' if you would like to have support for regular expressions in
+# the XSD/e runtime. If the regexp support is enabled, then the parser and
+# serializer validation code will use it to validate the xs:pattern facet.
+# If the regexp support is disabled, then this facet will be ignored. The
+# regexp support increases the resulting executable size by about 30-50Kb.
+#
+XSDE_REGEXP := n
+
+
# Base parser/serializer implementation reuse style. Valid values are:
#
# 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin)
diff --git a/dist/etc/iphone/config-simulator.make b/dist/etc/iphone/config-simulator.make
index 48f0652..2fa5632 100644
--- a/dist/etc/iphone/config-simulator.make
+++ b/dist/etc/iphone/config-simulator.make
@@ -115,6 +115,15 @@ XSDE_PARSER_VALIDATION := y
XSDE_SERIALIZER_VALIDATION := y
+# Set to 'y' if you would like to have support for regular expressions in
+# the XSD/e runtime. If the regexp support is enabled, then the parser and
+# serializer validation code will use it to validate the xs:pattern facet.
+# If the regexp support is disabled, then this facet will be ignored. The
+# regexp support increases the resulting executable size by about 30-50Kb.
+#
+XSDE_REGEXP := n
+
+
# Base parser/serializer implementation reuse style. Valid values are:
#
# 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin)
diff --git a/dist/etc/lynxos/config-4.2.make b/dist/etc/lynxos/config-4.2.make
index d1b1b68..1215121 100644
--- a/dist/etc/lynxos/config-4.2.make
+++ b/dist/etc/lynxos/config-4.2.make
@@ -114,6 +114,15 @@ XSDE_PARSER_VALIDATION := y
XSDE_SERIALIZER_VALIDATION := y
+# Set to 'y' if you would like to have support for regular expressions in
+# the XSD/e runtime. If the regexp support is enabled, then the parser and
+# serializer validation code will use it to validate the xs:pattern facet.
+# If the regexp support is disabled, then this facet will be ignored. The
+# regexp support increases the resulting executable size by about 30-50Kb.
+#
+XSDE_REGEXP := n
+
+
# Base parser/serializer implementation reuse style. Valid values are:
#
# 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin)
diff --git a/dist/etc/lynxos/config-5.0.make b/dist/etc/lynxos/config-5.0.make
index b61ade3..d0a22bb 100644
--- a/dist/etc/lynxos/config-5.0.make
+++ b/dist/etc/lynxos/config-5.0.make
@@ -114,6 +114,15 @@ XSDE_PARSER_VALIDATION := y
XSDE_SERIALIZER_VALIDATION := y
+# Set to 'y' if you would like to have support for regular expressions in
+# the XSD/e runtime. If the regexp support is enabled, then the parser and
+# serializer validation code will use it to validate the xs:pattern facet.
+# If the regexp support is disabled, then this facet will be ignored. The
+# regexp support increases the resulting executable size by about 30-50Kb.
+#
+XSDE_REGEXP := n
+
+
# Base parser/serializer implementation reuse style. Valid values are:
#
# 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin)
diff --git a/dist/etc/qnx/config-6.3-gcc-2.95.make b/dist/etc/qnx/config-6.3-gcc-2.95.make
index 1228db2..6a4a7e3 100644
--- a/dist/etc/qnx/config-6.3-gcc-2.95.make
+++ b/dist/etc/qnx/config-6.3-gcc-2.95.make
@@ -113,6 +113,15 @@ XSDE_PARSER_VALIDATION := y
XSDE_SERIALIZER_VALIDATION := y
+# Set to 'y' if you would like to have support for regular expressions in
+# the XSD/e runtime. If the regexp support is enabled, then the parser and
+# serializer validation code will use it to validate the xs:pattern facet.
+# If the regexp support is disabled, then this facet will be ignored. The
+# regexp support increases the resulting executable size by about 30-50Kb.
+#
+XSDE_REGEXP := n
+
+
# Base parser/serializer implementation reuse style. Valid values are:
#
# 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin)
diff --git a/dist/etc/qnx/config-6.3-gcc-3.3.make b/dist/etc/qnx/config-6.3-gcc-3.3.make
index fe39d31..915fe6c 100644
--- a/dist/etc/qnx/config-6.3-gcc-3.3.make
+++ b/dist/etc/qnx/config-6.3-gcc-3.3.make
@@ -113,6 +113,15 @@ XSDE_PARSER_VALIDATION := y
XSDE_SERIALIZER_VALIDATION := y
+# Set to 'y' if you would like to have support for regular expressions in
+# the XSD/e runtime. If the regexp support is enabled, then the parser and
+# serializer validation code will use it to validate the xs:pattern facet.
+# If the regexp support is disabled, then this facet will be ignored. The
+# regexp support increases the resulting executable size by about 30-50Kb.
+#
+XSDE_REGEXP := n
+
+
# Base parser/serializer implementation reuse style. Valid values are:
#
# 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin)
diff --git a/dist/etc/qnx/config-6.4.make b/dist/etc/qnx/config-6.4.make
index 33d3658..c8603cb 100644
--- a/dist/etc/qnx/config-6.4.make
+++ b/dist/etc/qnx/config-6.4.make
@@ -113,6 +113,15 @@ XSDE_PARSER_VALIDATION := y
XSDE_SERIALIZER_VALIDATION := y
+# Set to 'y' if you would like to have support for regular expressions in
+# the XSD/e runtime. If the regexp support is enabled, then the parser and
+# serializer validation code will use it to validate the xs:pattern facet.
+# If the regexp support is disabled, then this facet will be ignored. The
+# regexp support increases the resulting executable size by about 30-50Kb.
+#
+XSDE_REGEXP := n
+
+
# Base parser/serializer implementation reuse style. Valid values are:
#
# 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin)
diff --git a/dist/etc/vc-8.0/config-max.nmake b/dist/etc/vc-8.0/config-max.nmake
index 07a7c0a..ce2b95e 100644
--- a/dist/etc/vc-8.0/config-max.nmake
+++ b/dist/etc/vc-8.0/config-max.nmake
@@ -111,6 +111,15 @@ XSDE_PARSER_VALIDATION = y
XSDE_SERIALIZER_VALIDATION = y
+# Set to 'y' if you would like to have support for regular expressions in
+# the XSD/e runtime. If the regexp support is enabled, then the parser and
+# serializer validation code will use it to validate the xs:pattern facet.
+# If the regexp support is disabled, then this facet will be ignored. The
+# regexp support increases the resulting executable size by about 30-50Kb.
+#
+XSDE_REGEXP = n
+
+
# Base parser/serializer implementation reuse style. Valid values are:
#
# 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin)
diff --git a/dist/etc/vc-8.0/config-min.nmake b/dist/etc/vc-8.0/config-min.nmake
index 1177d58..a6f8e16 100644
--- a/dist/etc/vc-8.0/config-min.nmake
+++ b/dist/etc/vc-8.0/config-min.nmake
@@ -111,6 +111,15 @@ XSDE_PARSER_VALIDATION = y
XSDE_SERIALIZER_VALIDATION = y
+# Set to 'y' if you would like to have support for regular expressions in
+# the XSD/e runtime. If the regexp support is enabled, then the parser and
+# serializer validation code will use it to validate the xs:pattern facet.
+# If the regexp support is disabled, then this facet will be ignored. The
+# regexp support increases the resulting executable size by about 30-50Kb.
+#
+XSDE_REGEXP = n
+
+
# Base parser/serializer implementation reuse style. Valid values are:
#
# 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin)
diff --git a/dist/etc/vc-9.0/config-max.nmake b/dist/etc/vc-9.0/config-max.nmake
index 6d39a73..8b6a71d 100644
--- a/dist/etc/vc-9.0/config-max.nmake
+++ b/dist/etc/vc-9.0/config-max.nmake
@@ -111,6 +111,15 @@ XSDE_PARSER_VALIDATION = y
XSDE_SERIALIZER_VALIDATION = y
+# Set to 'y' if you would like to have support for regular expressions in
+# the XSD/e runtime. If the regexp support is enabled, then the parser and
+# serializer validation code will use it to validate the xs:pattern facet.
+# If the regexp support is disabled, then this facet will be ignored. The
+# regexp support increases the resulting executable size by about 30-50Kb.
+#
+XSDE_REGEXP = n
+
+
# Base parser/serializer implementation reuse style. Valid values are:
#
# 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin)
diff --git a/dist/etc/vc-9.0/config-min.nmake b/dist/etc/vc-9.0/config-min.nmake
index 337369f..f33f298 100644
--- a/dist/etc/vc-9.0/config-min.nmake
+++ b/dist/etc/vc-9.0/config-min.nmake
@@ -111,6 +111,15 @@ XSDE_PARSER_VALIDATION = y
XSDE_SERIALIZER_VALIDATION = y
+# Set to 'y' if you would like to have support for regular expressions in
+# the XSD/e runtime. If the regexp support is enabled, then the parser and
+# serializer validation code will use it to validate the xs:pattern facet.
+# If the regexp support is disabled, then this facet will be ignored. The
+# regexp support increases the resulting executable size by about 30-50Kb.
+#
+XSDE_REGEXP = n
+
+
# Base parser/serializer implementation reuse style. Valid values are:
#
# 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin)
diff --git a/dist/etc/vxworks/config-5.5.1.make b/dist/etc/vxworks/config-5.5.1.make
index df7549e..c94dc3f 100644
--- a/dist/etc/vxworks/config-5.5.1.make
+++ b/dist/etc/vxworks/config-5.5.1.make
@@ -114,6 +114,15 @@ XSDE_PARSER_VALIDATION := y
XSDE_SERIALIZER_VALIDATION := y
+# Set to 'y' if you would like to have support for regular expressions in
+# the XSD/e runtime. If the regexp support is enabled, then the parser and
+# serializer validation code will use it to validate the xs:pattern facet.
+# If the regexp support is disabled, then this facet will be ignored. The
+# regexp support increases the resulting executable size by about 30-50Kb.
+#
+XSDE_REGEXP := n
+
+
# Base parser/serializer implementation reuse style. Valid values are:
#
# 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin)
diff --git a/dist/etc/vxworks/config-6.4-max.make b/dist/etc/vxworks/config-6.4-max.make
index 835ba84..1811b38 100644
--- a/dist/etc/vxworks/config-6.4-max.make
+++ b/dist/etc/vxworks/config-6.4-max.make
@@ -126,6 +126,15 @@ XSDE_PARSER_VALIDATION := y
XSDE_SERIALIZER_VALIDATION := y
+# Set to 'y' if you would like to have support for regular expressions in
+# the XSD/e runtime. If the regexp support is enabled, then the parser and
+# serializer validation code will use it to validate the xs:pattern facet.
+# If the regexp support is disabled, then this facet will be ignored. The
+# regexp support increases the resulting executable size by about 30-50Kb.
+#
+XSDE_REGEXP := n
+
+
# Base parser/serializer implementation reuse style. Valid values are:
#
# 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin)
diff --git a/dist/etc/vxworks/config-6.4-min.make b/dist/etc/vxworks/config-6.4-min.make
index 2ac5ef3..4116c10 100644
--- a/dist/etc/vxworks/config-6.4-min.make
+++ b/dist/etc/vxworks/config-6.4-min.make
@@ -113,6 +113,15 @@ XSDE_PARSER_VALIDATION := y
XSDE_SERIALIZER_VALIDATION := y
+# Set to 'y' if you would like to have support for regular expressions in
+# the XSD/e runtime. If the regexp support is enabled, then the parser and
+# serializer validation code will use it to validate the xs:pattern facet.
+# If the regexp support is disabled, then this facet will be ignored. The
+# regexp support increases the resulting executable size by about 30-50Kb.
+#
+XSDE_REGEXP := n
+
+
# Base parser/serializer implementation reuse style. Valid values are:
#
# 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin)
diff --git a/dist/etc/vxworks/config-6.7-max.make b/dist/etc/vxworks/config-6.7-max.make
index 30e7347..77ad171 100644
--- a/dist/etc/vxworks/config-6.7-max.make
+++ b/dist/etc/vxworks/config-6.7-max.make
@@ -126,6 +126,15 @@ XSDE_PARSER_VALIDATION := y
XSDE_SERIALIZER_VALIDATION := y
+# Set to 'y' if you would like to have support for regular expressions in
+# the XSD/e runtime. If the regexp support is enabled, then the parser and
+# serializer validation code will use it to validate the xs:pattern facet.
+# If the regexp support is disabled, then this facet will be ignored. The
+# regexp support increases the resulting executable size by about 30-50Kb.
+#
+XSDE_REGEXP := n
+
+
# Base parser/serializer implementation reuse style. Valid values are:
#
# 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin)
diff --git a/dist/etc/vxworks/config-6.7-min.make b/dist/etc/vxworks/config-6.7-min.make
index b624c8c..babc9f3 100644
--- a/dist/etc/vxworks/config-6.7-min.make
+++ b/dist/etc/vxworks/config-6.7-min.make
@@ -113,6 +113,15 @@ XSDE_PARSER_VALIDATION := y
XSDE_SERIALIZER_VALIDATION := y
+# Set to 'y' if you would like to have support for regular expressions in
+# the XSD/e runtime. If the regexp support is enabled, then the parser and
+# serializer validation code will use it to validate the xs:pattern facet.
+# If the regexp support is disabled, then this facet will be ignored. The
+# regexp support increases the resulting executable size by about 30-50Kb.
+#
+XSDE_REGEXP := n
+
+
# Base parser/serializer implementation reuse style. Valid values are:
#
# 'mixin' - virtual inheritance-based reuse (specify --reuse-style-mixin)
diff --git a/dist/libxsde/xsde/makefile b/dist/libxsde/xsde/makefile
index a134d29..1e3db26 100644
--- a/dist/libxsde/xsde/makefile
+++ b/dist/libxsde/xsde/makefile
@@ -9,6 +9,12 @@ EXTRA_CPPFLAGS := -I..
src := c/expat/xmlparse.c c/expat/xmlrole.c c/expat/xmltok.c
src += c/genx/genx.c c/genx/char-props.c
+ifneq ($(XSDE_PARSER_VALIDATION)$(XSDE_SERIALIZER_VALIDATION),nn)
+ifeq ($(XSDE_REGEXP),y)
+src += c/regexp/chvalid.c c/regexp/xmlunicode.c c/regexp/xmlregexp.c
+endif
+endif
+
ifeq ($(XSDE_CUSTOM_ALLOCATOR),y)
ifeq ($(XSDE_DEFAULT_ALLOCATOR),y)
src += allocator.c
@@ -563,6 +569,9 @@ endif
ifeq ($(XSDE_SERIALIZER_VALIDATION),y)
@echo $(h)define XSDE_SERIALIZER_VALIDATION >>$@
endif
+ifeq ($(XSDE_REGEXP),y)
+ @echo $(h)define XSDE_REGEXP >>$@
+endif
ifeq ($(XSDE_REUSE_STYLE),mixin)
@echo $(h)define XSDE_REUSE_STYLE_MIXIN >>$@
else
diff --git a/dist/libxsde/xsde/nmakefile b/dist/libxsde/xsde/nmakefile
index 9eb9aea..da40ece 100644
--- a/dist/libxsde/xsde/nmakefile
+++ b/dist/libxsde/xsde/nmakefile
@@ -11,6 +11,12 @@ EXTRA_CPPFLAGS = /I..
src = c\expat\xmlparse.c c\expat\xmlrole.c c\expat\xmltok.c
src = $(src) c\genx\genx.c c\genx\char-props.c
+!if "$(XSDE_PARSER_VALIDATION)$(XSDE_SERIALIZER_VALIDATION)" != "nn"
+!if "$(XSDE_REGEXP)" == "n"
+src = $(src) c\regexp\chvalid.c c\regexp\xmlunicode.c c\regexp\xmlregexp.c
+!endif
+!endif
+
!if "$(XSDE_CUSTOM_ALLOCATOR)" == "y"
!if "$(XSDE_DEFAULT_ALLOCATOR)" == "y"
src = $(src) allocator.c
@@ -562,7 +568,9 @@ config.h:
!if "$(XSDE_SERIALIZER_VALIDATION)" == "y"
@echo #define XSDE_SERIALIZER_VALIDATION >>$@
!endif
-
+!if "$(XSDE_REGEXP)" == "y"
+ @echo #define REGEXP >>$@
+!endif
!if "$(XSDE_REUSE_STYLE)" == "mixin"
@echo #define XSDE_REUSE_STYLE_MIXIN >>$@
!else
@@ -619,6 +627,7 @@ cleanobj:
-del xsde.lib config.h
-del c\expat\*.obj
-del c\genx\*.obj
+ -del c\regexp\*.obj
-del cxx\*.obj
-del cxx\parser\*.obj
-del cxx\parser\expat\*.obj
diff --git a/dist/tests/cxx/hybrid/makefile b/dist/tests/cxx/hybrid/makefile
index 62bbc1e..9b5fea2 100644
--- a/dist/tests/cxx/hybrid/makefile
+++ b/dist/tests/cxx/hybrid/makefile
@@ -15,7 +15,7 @@ endif
endif
ifeq ($(XSDE_IOSTREAM),y)
-dirs += built-in default enumeration list test-template union
+dirs += built-in default enumeration list test-template union pattern
ifeq ($(XSDE_ENCODING),iso8859-1)
dirs += iso8859-1
diff --git a/dist/tests/cxx/hybrid/nmakefile b/dist/tests/cxx/hybrid/nmakefile
index 61097dd..560f0b1 100644
--- a/dist/tests/cxx/hybrid/nmakefile
+++ b/dist/tests/cxx/hybrid/nmakefile
@@ -15,7 +15,7 @@ dirs = $(dirs) iterator
!endif
!if "$(XSDE_IOSTREAM)" == "y"
-dirs = $(dirs) built-in default enumeration list test-template union
+dirs = $(dirs) built-in default enumeration list test-template union pattern
!if "$(XSDE_ENCODING)" == "iso8859-1"
dirs = $(dirs) iso8859-1
diff --git a/libxsde/xsde/c/regexp/COPYING b/libxsde/xsde/c/regexp/COPYING
new file mode 100644
index 0000000..9cedbe2
--- /dev/null
+++ b/libxsde/xsde/c/regexp/COPYING
@@ -0,0 +1,40 @@
+Copyright (c) 2009-2010 Code Synthesis Tools CC.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License version 2 as
+published by the Free Software Foundation.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+
+Original Version:
+
+Copyright (C) 1998-2003 Daniel Veillard.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is fur-
+nished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT-
+NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CON-
+NECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+Except as contained in this notice, the name of Daniel Veillard shall not
+be used in advertising or otherwise to promote the sale, use or other deal-
+ings in this Software without prior written authorization from him.
diff --git a/libxsde/xsde/c/regexp/chvalid.c b/libxsde/xsde/c/regexp/chvalid.c
new file mode 100644
index 0000000..ad248ad
--- /dev/null
+++ b/libxsde/xsde/c/regexp/chvalid.c
@@ -0,0 +1,388 @@
+/*
+ * chvalid.c: this module implements the character range
+ * validation APIs
+ *
+ * This file is automatically generated from the cvs source
+ * definition files using the genChRanges.py Python script
+ *
+ * Generation date: Mon Mar 27 11:09:48 2006
+ * Sources: chvalid.def
+ */
+#include <xsde/c/pre.h>
+
+#include <stddef.h>
+#include "chvalid.h"
+
+/*
+ * The initial tables ({func_name}_tab) are used to validate whether a
+ * single-byte character is within the specified group. Each table
+ * contains 256 bytes, with each byte representing one of the 256
+ * possible characters. If the table byte is set, the character is
+ * allowed.
+ *
+ */
+static const unsigned char xmlIsPubidChar_tab[256] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
+ 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x01,
+ 0x01, 0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x00, 0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01,
+ 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00 };
+
+static const xmlChSRange xmlIsBaseChar_srng[] = { {0x100, 0x131},
+ {0x134, 0x13e}, {0x141, 0x148}, {0x14a, 0x17e}, {0x180, 0x1c3},
+ {0x1cd, 0x1f0}, {0x1f4, 0x1f5}, {0x1fa, 0x217}, {0x250, 0x2a8},
+ {0x2bb, 0x2c1}, {0x386, 0x386}, {0x388, 0x38a}, {0x38c, 0x38c},
+ {0x38e, 0x3a1}, {0x3a3, 0x3ce}, {0x3d0, 0x3d6}, {0x3da, 0x3da},
+ {0x3dc, 0x3dc}, {0x3de, 0x3de}, {0x3e0, 0x3e0}, {0x3e2, 0x3f3},
+ {0x401, 0x40c}, {0x40e, 0x44f}, {0x451, 0x45c}, {0x45e, 0x481},
+ {0x490, 0x4c4}, {0x4c7, 0x4c8}, {0x4cb, 0x4cc}, {0x4d0, 0x4eb},
+ {0x4ee, 0x4f5}, {0x4f8, 0x4f9}, {0x531, 0x556}, {0x559, 0x559},
+ {0x561, 0x586}, {0x5d0, 0x5ea}, {0x5f0, 0x5f2}, {0x621, 0x63a},
+ {0x641, 0x64a}, {0x671, 0x6b7}, {0x6ba, 0x6be}, {0x6c0, 0x6ce},
+ {0x6d0, 0x6d3}, {0x6d5, 0x6d5}, {0x6e5, 0x6e6}, {0x905, 0x939},
+ {0x93d, 0x93d}, {0x958, 0x961}, {0x985, 0x98c}, {0x98f, 0x990},
+ {0x993, 0x9a8}, {0x9aa, 0x9b0}, {0x9b2, 0x9b2}, {0x9b6, 0x9b9},
+ {0x9dc, 0x9dd}, {0x9df, 0x9e1}, {0x9f0, 0x9f1}, {0xa05, 0xa0a},
+ {0xa0f, 0xa10}, {0xa13, 0xa28}, {0xa2a, 0xa30}, {0xa32, 0xa33},
+ {0xa35, 0xa36}, {0xa38, 0xa39}, {0xa59, 0xa5c}, {0xa5e, 0xa5e},
+ {0xa72, 0xa74}, {0xa85, 0xa8b}, {0xa8d, 0xa8d}, {0xa8f, 0xa91},
+ {0xa93, 0xaa8}, {0xaaa, 0xab0}, {0xab2, 0xab3}, {0xab5, 0xab9},
+ {0xabd, 0xabd}, {0xae0, 0xae0}, {0xb05, 0xb0c}, {0xb0f, 0xb10},
+ {0xb13, 0xb28}, {0xb2a, 0xb30}, {0xb32, 0xb33}, {0xb36, 0xb39},
+ {0xb3d, 0xb3d}, {0xb5c, 0xb5d}, {0xb5f, 0xb61}, {0xb85, 0xb8a},
+ {0xb8e, 0xb90}, {0xb92, 0xb95}, {0xb99, 0xb9a}, {0xb9c, 0xb9c},
+ {0xb9e, 0xb9f}, {0xba3, 0xba4}, {0xba8, 0xbaa}, {0xbae, 0xbb5},
+ {0xbb7, 0xbb9}, {0xc05, 0xc0c}, {0xc0e, 0xc10}, {0xc12, 0xc28},
+ {0xc2a, 0xc33}, {0xc35, 0xc39}, {0xc60, 0xc61}, {0xc85, 0xc8c},
+ {0xc8e, 0xc90}, {0xc92, 0xca8}, {0xcaa, 0xcb3}, {0xcb5, 0xcb9},
+ {0xcde, 0xcde}, {0xce0, 0xce1}, {0xd05, 0xd0c}, {0xd0e, 0xd10},
+ {0xd12, 0xd28}, {0xd2a, 0xd39}, {0xd60, 0xd61}, {0xe01, 0xe2e},
+ {0xe30, 0xe30}, {0xe32, 0xe33}, {0xe40, 0xe45}, {0xe81, 0xe82},
+ {0xe84, 0xe84}, {0xe87, 0xe88}, {0xe8a, 0xe8a}, {0xe8d, 0xe8d},
+ {0xe94, 0xe97}, {0xe99, 0xe9f}, {0xea1, 0xea3}, {0xea5, 0xea5},
+ {0xea7, 0xea7}, {0xeaa, 0xeab}, {0xead, 0xeae}, {0xeb0, 0xeb0},
+ {0xeb2, 0xeb3}, {0xebd, 0xebd}, {0xec0, 0xec4}, {0xf40, 0xf47},
+ {0xf49, 0xf69}, {0x10a0, 0x10c5}, {0x10d0, 0x10f6}, {0x1100, 0x1100},
+ {0x1102, 0x1103}, {0x1105, 0x1107}, {0x1109, 0x1109}, {0x110b, 0x110c},
+ {0x110e, 0x1112}, {0x113c, 0x113c}, {0x113e, 0x113e}, {0x1140, 0x1140},
+ {0x114c, 0x114c}, {0x114e, 0x114e}, {0x1150, 0x1150}, {0x1154, 0x1155},
+ {0x1159, 0x1159}, {0x115f, 0x1161}, {0x1163, 0x1163}, {0x1165, 0x1165},
+ {0x1167, 0x1167}, {0x1169, 0x1169}, {0x116d, 0x116e}, {0x1172, 0x1173},
+ {0x1175, 0x1175}, {0x119e, 0x119e}, {0x11a8, 0x11a8}, {0x11ab, 0x11ab},
+ {0x11ae, 0x11af}, {0x11b7, 0x11b8}, {0x11ba, 0x11ba}, {0x11bc, 0x11c2},
+ {0x11eb, 0x11eb}, {0x11f0, 0x11f0}, {0x11f9, 0x11f9}, {0x1e00, 0x1e9b},
+ {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45},
+ {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f59, 0x1f59}, {0x1f5b, 0x1f5b},
+ {0x1f5d, 0x1f5d}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fbc},
+ {0x1fbe, 0x1fbe}, {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fcc}, {0x1fd0, 0x1fd3},
+ {0x1fd6, 0x1fdb}, {0x1fe0, 0x1fec}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffc},
+ {0x2126, 0x2126}, {0x212a, 0x212b}, {0x212e, 0x212e}, {0x2180, 0x2182},
+ {0x3041, 0x3094}, {0x30a1, 0x30fa}, {0x3105, 0x312c}, {0xac00, 0xd7a3}};
+
+static const xmlChRangeGroup xmlIsBaseCharGroup =
+ {197, 0, xmlIsBaseChar_srng, (xmlChLRangePtr)0};
+
+static const xmlChSRange xmlIsChar_srng[] = { {0x100, 0xd7ff},
+ {0xe000, 0xfffd}};
+static const xmlChLRange xmlIsChar_lrng[] = { {0x10000, 0x10ffff}};
+
+static const xmlChRangeGroup xmlIsCharGroup =
+ {2, 1, xmlIsChar_srng, xmlIsChar_lrng};
+
+static const xmlChSRange xmlIsCombining_srng[] = { {0x300, 0x345},
+ {0x360, 0x361}, {0x483, 0x486}, {0x591, 0x5a1}, {0x5a3, 0x5b9},
+ {0x5bb, 0x5bd}, {0x5bf, 0x5bf}, {0x5c1, 0x5c2}, {0x5c4, 0x5c4},
+ {0x64b, 0x652}, {0x670, 0x670}, {0x6d6, 0x6dc}, {0x6dd, 0x6df},
+ {0x6e0, 0x6e4}, {0x6e7, 0x6e8}, {0x6ea, 0x6ed}, {0x901, 0x903},
+ {0x93c, 0x93c}, {0x93e, 0x94c}, {0x94d, 0x94d}, {0x951, 0x954},
+ {0x962, 0x963}, {0x981, 0x983}, {0x9bc, 0x9bc}, {0x9be, 0x9be},
+ {0x9bf, 0x9bf}, {0x9c0, 0x9c4}, {0x9c7, 0x9c8}, {0x9cb, 0x9cd},
+ {0x9d7, 0x9d7}, {0x9e2, 0x9e3}, {0xa02, 0xa02}, {0xa3c, 0xa3c},
+ {0xa3e, 0xa3e}, {0xa3f, 0xa3f}, {0xa40, 0xa42}, {0xa47, 0xa48},
+ {0xa4b, 0xa4d}, {0xa70, 0xa71}, {0xa81, 0xa83}, {0xabc, 0xabc},
+ {0xabe, 0xac5}, {0xac7, 0xac9}, {0xacb, 0xacd}, {0xb01, 0xb03},
+ {0xb3c, 0xb3c}, {0xb3e, 0xb43}, {0xb47, 0xb48}, {0xb4b, 0xb4d},
+ {0xb56, 0xb57}, {0xb82, 0xb83}, {0xbbe, 0xbc2}, {0xbc6, 0xbc8},
+ {0xbca, 0xbcd}, {0xbd7, 0xbd7}, {0xc01, 0xc03}, {0xc3e, 0xc44},
+ {0xc46, 0xc48}, {0xc4a, 0xc4d}, {0xc55, 0xc56}, {0xc82, 0xc83},
+ {0xcbe, 0xcc4}, {0xcc6, 0xcc8}, {0xcca, 0xccd}, {0xcd5, 0xcd6},
+ {0xd02, 0xd03}, {0xd3e, 0xd43}, {0xd46, 0xd48}, {0xd4a, 0xd4d},
+ {0xd57, 0xd57}, {0xe31, 0xe31}, {0xe34, 0xe3a}, {0xe47, 0xe4e},
+ {0xeb1, 0xeb1}, {0xeb4, 0xeb9}, {0xebb, 0xebc}, {0xec8, 0xecd},
+ {0xf18, 0xf19}, {0xf35, 0xf35}, {0xf37, 0xf37}, {0xf39, 0xf39},
+ {0xf3e, 0xf3e}, {0xf3f, 0xf3f}, {0xf71, 0xf84}, {0xf86, 0xf8b},
+ {0xf90, 0xf95}, {0xf97, 0xf97}, {0xf99, 0xfad}, {0xfb1, 0xfb7},
+ {0xfb9, 0xfb9}, {0x20d0, 0x20dc}, {0x20e1, 0x20e1}, {0x302a, 0x302f},
+ {0x3099, 0x3099}, {0x309a, 0x309a}};
+
+static const xmlChRangeGroup xmlIsCombiningGroup =
+ {95, 0, xmlIsCombining_srng, (xmlChLRangePtr)0};
+
+static const xmlChSRange xmlIsDigit_srng[] = { {0x660, 0x669},
+ {0x6f0, 0x6f9}, {0x966, 0x96f}, {0x9e6, 0x9ef}, {0xa66, 0xa6f},
+ {0xae6, 0xaef}, {0xb66, 0xb6f}, {0xbe7, 0xbef}, {0xc66, 0xc6f},
+ {0xce6, 0xcef}, {0xd66, 0xd6f}, {0xe50, 0xe59}, {0xed0, 0xed9},
+ {0xf20, 0xf29}};
+
+static const xmlChRangeGroup xmlIsDigitGroup =
+ {14, 0, xmlIsDigit_srng, (xmlChLRangePtr)0};
+
+static const xmlChSRange xmlIsExtender_srng[] = { {0x2d0, 0x2d0},
+ {0x2d1, 0x2d1}, {0x387, 0x387}, {0x640, 0x640}, {0xe46, 0xe46},
+ {0xec6, 0xec6}, {0x3005, 0x3005}, {0x3031, 0x3035}, {0x309d, 0x309e},
+ {0x30fc, 0x30fe}};
+
+static const xmlChRangeGroup xmlIsExtenderGroup =
+ {10, 0, xmlIsExtender_srng, (xmlChLRangePtr)0};
+
+static const xmlChSRange xmlIsIdeographic_srng[] = { {0x3007, 0x3007},
+ {0x3021, 0x3029}, {0x4e00, 0x9fa5}};
+
+static const xmlChRangeGroup xmlIsIdeographicGroup =
+ {3, 0, xmlIsIdeographic_srng, (xmlChLRangePtr)0};
+
+/**
+ * xmlCharInRange:
+ * @val: character to be validated
+ * @rptr: pointer to range to be used to validate
+ *
+ * Does a binary search of the range table to determine if char
+ * is valid
+ *
+ * Returns: true if character valid, false otherwise
+ */
+int
+xmlCharInRange (unsigned int val, const xmlChRangeGroup *rptr) {
+ int low, high, mid;
+ const xmlChSRange *sptr;
+ const xmlChLRange *lptr;
+
+ if (rptr == NULL) return(0);
+ if (val < 0x10000) { /* is val in 'short' or 'long' array? */
+ if (rptr->nbShortRange == 0)
+ return 0;
+ low = 0;
+ high = rptr->nbShortRange - 1;
+ sptr = rptr->shortRange;
+ while (low <= high) {
+ mid = (low + high) / 2;
+ if ((unsigned short) val < sptr[mid].low) {
+ high = mid - 1;
+ } else {
+ if ((unsigned short) val > sptr[mid].high) {
+ low = mid + 1;
+ } else {
+ return 1;
+ }
+ }
+ }
+ } else {
+ if (rptr->nbLongRange == 0) {
+ return 0;
+ }
+ low = 0;
+ high = rptr->nbLongRange - 1;
+ lptr = rptr->longRange;
+ while (low <= high) {
+ mid = (low + high) / 2;
+ if (val < lptr[mid].low) {
+ high = mid - 1;
+ } else {
+ if (val > lptr[mid].high) {
+ low = mid + 1;
+ } else {
+ return 1;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/**
+ * xmlIsBaseChar_ch:
+ * @c: char to validate
+ *
+ * Automatically generated by genChRanges.py
+ */
+#define xmlIsBaseChar_ch(c) (((0x41 <= (c)) && ((c) <= 0x5a)) || \
+ ((0x61 <= (c)) && ((c) <= 0x7a)) || \
+ ((0xc0 <= (c)) && ((c) <= 0xd6)) || \
+ ((0xd8 <= (c)) && ((c) <= 0xf6)) || \
+ (0xf8 <= (c)))
+
+/**
+ * xmlIsBaseCharQ:
+ * @c: char to validate
+ *
+ * Automatically generated by genChRanges.py
+ */
+#define xmlIsBaseCharQ(c) (((c) < 0x100) ? \
+ xmlIsBaseChar_ch((c)) : \
+ xmlCharInRange((c), &xmlIsBaseCharGroup))
+
+/**
+ * xmlIsBlank_ch:
+ * @c: char to validate
+ *
+ * Automatically generated by genChRanges.py
+ */
+#define xmlIsBlank_ch(c) (((c) == 0x20) || \
+ ((0x9 <= (c)) && ((c) <= 0xa)) || \
+ ((c) == 0xd))
+
+/**
+ * xmlIsBlankQ:
+ * @c: char to validate
+ *
+ * Automatically generated by genChRanges.py
+ */
+#define xmlIsBlankQ(c) (((c) < 0x100) ? \
+ xmlIsBlank_ch((c)) : 0)
+
+
+/**
+ * xmlIsChar_ch:
+ * @c: char to validate
+ *
+ * Automatically generated by genChRanges.py
+ */
+#define xmlIsChar_ch(c) (((0x9 <= (c)) && ((c) <= 0xa)) || \
+ ((c) == 0xd) || \
+ (0x20 <= (c)))
+
+/**
+ * xmlIsCharQ:
+ * @c: char to validate
+ *
+ * Automatically generated by genChRanges.py
+ */
+#define xmlIsCharQ(c) (((c) < 0x100) ? \
+ xmlIsChar_ch((c)) :\
+ (((0x100 <= (c)) && ((c) <= 0xd7ff)) || \
+ ((0xe000 <= (c)) && ((c) <= 0xfffd)) || \
+ ((0x10000 <= (c)) && ((c) <= 0x10ffff))))
+
+/**
+ * xmlIsCombiningQ:
+ * @c: char to validate
+ *
+ * Automatically generated by genChRanges.py
+ */
+#define xmlIsCombiningQ(c) (((c) < 0x100) ? \
+ 0 : \
+ xmlCharInRange((c), &xmlIsCombiningGroup))
+
+/**
+ * xmlIsDigit_ch:
+ * @c: char to validate
+ *
+ * Automatically generated by genChRanges.py
+ */
+#define xmlIsDigit_ch(c) (((0x30 <= (c)) && ((c) <= 0x39)))
+
+/**
+ * xmlIsDigitQ:
+ * @c: char to validate
+ *
+ * Automatically generated by genChRanges.py
+ */
+#define xmlIsDigitQ(c) (((c) < 0x100) ? \
+ xmlIsDigit_ch((c)) : \
+ xmlCharInRange((c), &xmlIsDigitGroup))
+
+/**
+ * xmlIsExtender_ch:
+ * @c: char to validate
+ *
+ * Automatically generated by genChRanges.py
+ */
+#define xmlIsExtender_ch(c) (((c) == 0xb7))
+
+/**
+ * xmlIsExtenderQ:
+ * @c: char to validate
+ *
+ * Automatically generated by genChRanges.py
+ */
+#define xmlIsExtenderQ(c) (((c) < 0x100) ? \
+ xmlIsExtender_ch((c)) : \
+ xmlCharInRange((c), &xmlIsExtenderGroup))
+
+/**
+ * xmlIsIdeographicQ:
+ * @c: char to validate
+ *
+ * Automatically generated by genChRanges.py
+ */
+#define xmlIsIdeographicQ(c) (((c) < 0x100) ? \
+ 0 :\
+ (((0x4e00 <= (c)) && ((c) <= 0x9fa5)) || \
+ ((c) == 0x3007) || \
+ ((0x3021 <= (c)) && ((c) <= 0x3029))))
+
+/**
+ * xmlIsPubidChar_ch:
+ * @c: char to validate
+ *
+ * Automatically generated by genChRanges.py
+ */
+#define xmlIsPubidChar_ch(c) (xmlIsPubidChar_tab[(c)])
+
+/**
+ * xmlIsPubidCharQ:
+ * @c: char to validate
+ *
+ * Automatically generated by genChRanges.py
+ */
+#define xmlIsPubidCharQ(c) (((c) < 0x100) ? \
+ xmlIsPubidChar_ch((c)) : 0)
+
+int
+xmlIsDigit (int c)
+{
+ return xmlIsDigitQ (c);
+}
+
+int
+xmlIsLetter (int c)
+{
+ return xmlIsBaseCharQ (c) || xmlIsIdeographicQ (c);
+}
+
+int
+xmlIsCombining (int c)
+{
+ return xmlIsCombiningQ (c);
+}
+
+int
+xmlIsExtender (int c)
+{
+ return xmlIsExtenderQ (c);
+}
+
+int
+xmlIsChar (int c)
+{
+ return xmlIsCharQ (c);
+}
+
+#include <xsde/c/post.h>
diff --git a/libxsde/xsde/c/regexp/chvalid.h b/libxsde/xsde/c/regexp/chvalid.h
new file mode 100644
index 0000000..9865350
--- /dev/null
+++ b/libxsde/xsde/c/regexp/chvalid.h
@@ -0,0 +1,63 @@
+/*
+ * Summary: Unicode character range checking
+ * Description: this module exports interfaces for the character
+ * range validation APIs
+ *
+ * This file is automatically generated from the cvs source
+ * definition files using the genChRanges.py Python script
+ *
+ * Generation date: Mon Mar 27 11:09:48 2006
+ * Sources: chvalid.def
+ */
+
+#ifndef __XML_CHVALID_H__
+#define __XML_CHVALID_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Define our typedefs and structures
+ *
+ */
+typedef struct _xmlChSRange xmlChSRange;
+typedef xmlChSRange *xmlChSRangePtr;
+struct _xmlChSRange {
+ unsigned short low;
+ unsigned short high;
+};
+
+typedef struct _xmlChLRange xmlChLRange;
+typedef xmlChLRange *xmlChLRangePtr;
+struct _xmlChLRange {
+ unsigned int low;
+ unsigned int high;
+};
+
+typedef struct _xmlChRangeGroup xmlChRangeGroup;
+typedef xmlChRangeGroup *xmlChRangeGroupPtr;
+struct _xmlChRangeGroup {
+ int nbShortRange;
+ int nbLongRange;
+ const xmlChSRange *shortRange; /* points to an array of ranges */
+ const xmlChLRange *longRange;
+};
+
+
+
+/**
+ * Range checking routine
+ */
+int xmlCharInRange(unsigned int val, const xmlChRangeGroup *group);
+
+int xmlIsDigit (int c);
+int xmlIsLetter (int c);
+int xmlIsCombining (int c);
+int xmlIsExtender (int c);
+int xmlIsChar (int c);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* __XML_CHVALID_H__ */
diff --git a/libxsde/xsde/c/regexp/xmlregexp.c b/libxsde/xsde/c/regexp/xmlregexp.c
new file mode 100644
index 0000000..ad493ce
--- /dev/null
+++ b/libxsde/xsde/c/regexp/xmlregexp.c
@@ -0,0 +1,4375 @@
+/*
+ * regexp.c: generic and extensible Regular Expression engine
+ *
+ * Basically designed with the purpose of compiling regexps for
+ * the variety of validation/shemas mechanisms now available in
+ * XML related specifications these include:
+ * - XML-1.0 DTD validation
+ * - XML Schemas structure part 1
+ * - XML Schemas Datatypes part 2 especially Appendix F
+ * - RELAX-NG/TREX i.e. the counter proposal
+ *
+ * See COPYING for the status of this software.
+ */
+#include <xsde/c/pre.h>
+
+#include <string.h>
+#include <stdlib.h>
+
+#include <xsde/config.h>
+
+#ifdef XSDE_CUSTOM_ALLOCATOR
+# include <xsde/allocator.h>
+#endif
+
+#include "xmlregexp.h"
+#include "xmlunicode.h"
+#include "chvalid.h"
+
+#ifndef INT_MAX
+#define INT_MAX 123456789 /* easy to flag and big enough for our needs */
+#endif
+
+#define MAX_PUSH 10000000
+
+typedef enum
+{
+ XML_REGEXP_ERROR_NONE,
+ XML_REGEXP_ERROR_MEMORY,
+ XML_REGEXP_ERROR_COMPILE
+} xmlRegexpError;
+
+#define REGEXP_ERROR(str) xmlRegexpErrCompile(ctxt, str);
+
+#ifdef XSDE_CUSTOM_ALLOCATOR
+#define REGEXP_MALLOC(n) xsde_alloc(n)
+#define REGEXP_REALLOC(p, n) xsde_realloc(p, n)
+#define REGEXP_FREE(p) xsde_free(p)
+#else
+#define REGEXP_MALLOC(n) malloc(n)
+#define REGEXP_REALLOC(p, n) realloc(p, n)
+#define REGEXP_FREE(p) free(p)
+#endif
+
+#define NEXT ctxt->cur++
+#define CUR (*(ctxt->cur))
+#define NXT(index) (ctxt->cur[index])
+
+#define CUR_SCHAR(s, l) xmlStringCurrentChar(s, &l)
+#define NEXTL(l) ctxt->cur += l;
+#define XML_REG_STRING_SEPARATOR '|'
+
+/*
+ * Need PREV to check on a '-' within a Character Group. May only be used
+ * when it's guaranteed that cur is not at the beginning of ctxt->string!
+ */
+#define PREV (ctxt->cur[-1])
+
+
+/************************************************************************
+ * *
+ * Datatypes and structures *
+ * *
+ ************************************************************************/
+
+/*
+ * Note: the order of the enums below is significant, do not shuffle
+ */
+typedef enum {
+ XML_REGEXP_EPSILON = 1,
+ XML_REGEXP_CHARVAL,
+ XML_REGEXP_RANGES,
+ XML_REGEXP_SUBREG, /* used for () sub regexps */
+ XML_REGEXP_STRING,
+ XML_REGEXP_ANYCHAR, /* . */
+ XML_REGEXP_ANYSPACE, /* \s */
+ XML_REGEXP_NOTSPACE, /* \S */
+ XML_REGEXP_INITNAME, /* \l */
+ XML_REGEXP_NOTINITNAME, /* \L */
+ XML_REGEXP_NAMECHAR, /* \c */
+ XML_REGEXP_NOTNAMECHAR, /* \C */
+ XML_REGEXP_DECIMAL, /* \d */
+ XML_REGEXP_NOTDECIMAL, /* \D */
+ XML_REGEXP_REALCHAR, /* \w */
+ XML_REGEXP_NOTREALCHAR, /* \W */
+ XML_REGEXP_LETTER = 100,
+ XML_REGEXP_LETTER_UPPERCASE,
+ XML_REGEXP_LETTER_LOWERCASE,
+ XML_REGEXP_LETTER_TITLECASE,
+ XML_REGEXP_LETTER_MODIFIER,
+ XML_REGEXP_LETTER_OTHERS,
+ XML_REGEXP_MARK,
+ XML_REGEXP_MARK_NONSPACING,
+ XML_REGEXP_MARK_SPACECOMBINING,
+ XML_REGEXP_MARK_ENCLOSING,
+ XML_REGEXP_NUMBER,
+ XML_REGEXP_NUMBER_DECIMAL,
+ XML_REGEXP_NUMBER_LETTER,
+ XML_REGEXP_NUMBER_OTHERS,
+ XML_REGEXP_PUNCT,
+ XML_REGEXP_PUNCT_CONNECTOR,
+ XML_REGEXP_PUNCT_DASH,
+ XML_REGEXP_PUNCT_OPEN,
+ XML_REGEXP_PUNCT_CLOSE,
+ XML_REGEXP_PUNCT_INITQUOTE,
+ XML_REGEXP_PUNCT_FINQUOTE,
+ XML_REGEXP_PUNCT_OTHERS,
+ XML_REGEXP_SEPAR,
+ XML_REGEXP_SEPAR_SPACE,
+ XML_REGEXP_SEPAR_LINE,
+ XML_REGEXP_SEPAR_PARA,
+ XML_REGEXP_SYMBOL,
+ XML_REGEXP_SYMBOL_MATH,
+ XML_REGEXP_SYMBOL_CURRENCY,
+ XML_REGEXP_SYMBOL_MODIFIER,
+ XML_REGEXP_SYMBOL_OTHERS,
+ XML_REGEXP_OTHER,
+ XML_REGEXP_OTHER_CONTROL,
+ XML_REGEXP_OTHER_FORMAT,
+ XML_REGEXP_OTHER_PRIVATE,
+ XML_REGEXP_OTHER_NA,
+ XML_REGEXP_BLOCK_NAME
+} xmlRegAtomType;
+
+typedef enum {
+ XML_REGEXP_QUANT_EPSILON = 1,
+ XML_REGEXP_QUANT_ONCE,
+ XML_REGEXP_QUANT_OPT,
+ XML_REGEXP_QUANT_MULT,
+ XML_REGEXP_QUANT_PLUS,
+ XML_REGEXP_QUANT_ONCEONLY,
+ XML_REGEXP_QUANT_ALL,
+ XML_REGEXP_QUANT_RANGE
+} xmlRegQuantType;
+
+typedef enum {
+ XML_REGEXP_START_STATE = 1,
+ XML_REGEXP_FINAL_STATE,
+ XML_REGEXP_TRANS_STATE,
+ XML_REGEXP_SINK_STATE,
+ XML_REGEXP_UNREACH_STATE
+} xmlRegStateType;
+
+typedef enum {
+ XML_REGEXP_MARK_NORMAL = 0,
+ XML_REGEXP_MARK_START,
+ XML_REGEXP_MARK_VISITED
+} xmlRegMarkedType;
+
+typedef struct _xmlRegRange xmlRegRange;
+typedef xmlRegRange *xmlRegRangePtr;
+
+struct _xmlRegRange {
+ int neg; /* 0 normal, 1 not, 2 exclude */
+ xmlRegAtomType type;
+ int start;
+ int end;
+ xmlChar *blockName;
+};
+
+typedef struct _xmlRegAtom xmlRegAtom;
+typedef xmlRegAtom *xmlRegAtomPtr;
+
+typedef struct _xmlAutomataState xmlRegState;
+typedef xmlRegState *xmlRegStatePtr;
+
+struct _xmlRegAtom {
+ int no;
+ xmlRegAtomType type;
+ xmlRegQuantType quant;
+ int min;
+ int max;
+
+ void *valuep;
+ void *valuep2;
+ int neg;
+ int codepoint;
+ xmlRegStatePtr start;
+ xmlRegStatePtr start0;
+ xmlRegStatePtr stop;
+ int maxRanges;
+ int nbRanges;
+ xmlRegRangePtr *ranges;
+ void *data;
+};
+
+typedef struct _xmlRegCounter xmlRegCounter;
+typedef xmlRegCounter *xmlRegCounterPtr;
+
+struct _xmlRegCounter {
+ int min;
+ int max;
+};
+
+typedef struct _xmlRegTrans xmlRegTrans;
+typedef xmlRegTrans *xmlRegTransPtr;
+
+struct _xmlRegTrans {
+ xmlRegAtomPtr atom;
+ int to;
+ int counter;
+ int count;
+ int nd;
+};
+
+typedef struct _xmlAutomataState xmlAutomataState;
+typedef xmlAutomataState *xmlAutomataStatePtr;
+
+struct _xmlAutomataState {
+ xmlRegStateType type;
+ xmlRegMarkedType mark;
+ xmlRegMarkedType reached;
+ int no;
+ int maxTrans;
+ int nbTrans;
+ xmlRegTrans *trans;
+ /* knowing states ponting to us can speed things up */
+ int maxTransTo;
+ int nbTransTo;
+ int *transTo;
+};
+
+typedef struct _xmlAutomata xmlRegParserCtxt;
+typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
+
+#define AM_AUTOMATA_RNG 1
+
+typedef struct _xmlAutomata xmlAutomata;
+typedef xmlAutomata *xmlAutomataPtr;
+
+struct _xmlAutomata {
+ xmlChar *string;
+ xmlChar *cur;
+
+ int error;
+ const char *error_str;
+ int neg;
+
+ xmlRegStatePtr start;
+ xmlRegStatePtr end;
+ xmlRegStatePtr state;
+
+ xmlRegAtomPtr atom;
+
+ int maxAtoms;
+ int nbAtoms;
+ xmlRegAtomPtr *atoms;
+
+ int maxStates;
+ int nbStates;
+ xmlRegStatePtr *states;
+
+ int maxCounters;
+ int nbCounters;
+ xmlRegCounter *counters;
+
+ int determinist;
+ int negs;
+ int flags;
+};
+
+struct _xmlRegexp {
+ xmlChar *string;
+ int nbStates;
+ xmlRegStatePtr *states;
+ int nbAtoms;
+ xmlRegAtomPtr *atoms;
+ int nbCounters;
+ xmlRegCounter *counters;
+ int determinist;
+ int flags;
+ /*
+ * That's the compact form for determinists automatas
+ */
+ int nbstates;
+ int *compact;
+ void **transdata;
+ int nbstrings;
+ xmlChar **stringMap;
+};
+
+typedef struct _xmlRegExecRollback xmlRegExecRollback;
+typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
+
+struct _xmlRegExecRollback {
+ xmlRegStatePtr state;/* the current state */
+ int index; /* the index in the input stack */
+ int nextbranch; /* the next transition to explore in that state */
+ int *counts; /* save the automata state if it has some */
+};
+
+typedef struct _xmlRegInputToken xmlRegInputToken;
+typedef xmlRegInputToken *xmlRegInputTokenPtr;
+
+struct _xmlRegInputToken {
+ xmlChar *value;
+ void *data;
+};
+
+typedef struct _xmlRegExecCtxt xmlRegExecCtxt;
+typedef xmlRegExecCtxt *xmlRegExecCtxtPtr;
+
+typedef void (*xmlRegExecCallbacks) (xmlRegExecCtxtPtr exec,
+ const xmlChar *token,
+ void *transdata,
+ void *inputdata);
+
+struct _xmlRegExecCtxt {
+ int status; /* execution status != 0 indicate an error */
+ int determinist; /* did we find an indeterministic behaviour */
+ xmlRegexpPtr comp; /* the compiled regexp */
+ xmlRegExecCallbacks callback;
+ void *data;
+
+ xmlRegStatePtr state;/* the current state */
+ int transno; /* the current transition on that state */
+ int transcount; /* the number of chars in char counted transitions */
+
+ /*
+ * A stack of rollback states
+ */
+ int maxRollbacks;
+ int nbRollbacks;
+ xmlRegExecRollback *rollbacks;
+
+ /*
+ * The state of the automata if any
+ */
+ int *counts;
+
+ /*
+ * The input stack
+ */
+ int inputStackMax;
+ int inputStackNr;
+ int index;
+ int *charStack;
+ const xmlChar *inputString; /* when operating on characters */
+ xmlRegInputTokenPtr inputStack;/* when operating on strings */
+
+ /*
+ * error handling
+ */
+ int errStateNo; /* the error state number */
+ xmlRegStatePtr errState; /* the error state */
+ xmlChar *errString; /* the string raising the error */
+ int *errCounts; /* counters at the error state */
+ int nbPush;
+};
+
+#define REGEXP_ALL_COUNTER 0x123456
+#define REGEXP_ALL_LAX_COUNTER 0x123457
+
+static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
+static void xmlRegFreeState(xmlRegStatePtr state);
+static void xmlRegFreeAtom(xmlRegAtomPtr atom);
+static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr);
+static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);
+static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,
+ int neg, int start, int end, const xmlChar *blockName);
+
+static xmlAutomataPtr xmlNewAutomata(void);
+static void xmlFreeAutomata(xmlAutomataPtr am);
+
+static int
+xmlStrEqual (const xmlChar* s1, const xmlChar* s2)
+{
+ return strcmp ((const char*) (s1), (const char*) (s2)) == 0;
+}
+
+static xmlChar*
+xmlStrdup (const xmlChar* s)
+{
+ return (xmlChar*) (strdup ((const char*) (s)));
+}
+
+static xmlChar*
+xmlStrndup (const xmlChar* s, int len)
+{
+ unsigned int n = (unsigned int)len;
+
+ char* r = (char*) REGEXP_MALLOC (n + 1);
+
+ if (r != NULL)
+ {
+ memcpy (r, s, n);
+ r[n] = '\0';
+ }
+
+ return (xmlChar*)r;
+}
+
+/**
+ * xmlStringCurrentChar:
+ * @ctxt: the XML parser context
+ * @cur: pointer to the beginning of the char
+ * @len: pointer to the length of the char read
+ *
+ * The current char value, if using UTF-8 this may actually span multiple
+ * bytes in the input buffer.
+ *
+ * Returns the current char value and its length
+ */
+
+static int
+xmlStringCurrentChar(const xmlChar * cur, int *len)
+{
+ /*
+ * We are supposed to handle UTF8, check it's valid
+ * From rfc2044: encoding of the Unicode values on UTF-8:
+ *
+ * UCS-4 range (hex.) UTF-8 octet sequence (binary)
+ * 0000 0000-0000 007F 0xxxxxxx
+ * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
+ *
+ * Check for the 0x110000 limit too
+ */
+ unsigned char c;
+ unsigned int val;
+
+ c = *cur;
+ if (c & 0x80)
+ {
+ if ((cur[1] & 0xc0) != 0x80)
+ goto encoding_error;
+ if ((c & 0xe0) == 0xe0)
+ {
+
+ if ((cur[2] & 0xc0) != 0x80)
+ goto encoding_error;
+ if ((c & 0xf0) == 0xf0)
+ {
+ if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
+ goto encoding_error;
+ /* 4-byte code */
+ *len = 4;
+ val = (cur[0] & 0x7) << 18;
+ val |= (cur[1] & 0x3f) << 12;
+ val |= (cur[2] & 0x3f) << 6;
+ val |= cur[3] & 0x3f;
+ }
+ else
+ {
+ /* 3-byte code */
+ *len = 3;
+ val = (cur[0] & 0xf) << 12;
+ val |= (cur[1] & 0x3f) << 6;
+ val |= cur[2] & 0x3f;
+ }
+ }
+ else
+ {
+ /* 2-byte code */
+ *len = 2;
+ val = (cur[0] & 0x1f) << 6;
+ val |= cur[1] & 0x3f;
+ }
+
+ return (val);
+ }
+ else
+ {
+ /* 1-byte code */
+ *len = 1;
+ return ((int) *cur);
+ }
+
+ /*
+ * Assume it's a fixed length encoding (1) with
+ * a compatible encoding for the ASCII set, since
+ * XML constructs only use < 128 chars
+ */
+ *len = 1;
+ return ((int) *cur);
+
+encoding_error:
+
+ /*
+ * An encoding problem may arise from a truncated input buffer
+ * splitting a character in the middle. In that case do not raise
+ * an error but return 0 to endicate an end of stream problem
+ */
+ *len = 0;
+ return(0);
+}
+
+/************************************************************************
+ * *
+ * Regexp memory error handler *
+ * *
+ ************************************************************************/
+/**
+ * xmlRegexpErrMemory:
+ * @extra: extra information
+ *
+ * Handle an out of memory condition
+ */
+static void
+xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra)
+{
+ if (ctxt != NULL) {
+ ctxt->error = XML_REGEXP_ERROR_MEMORY;
+ ctxt->error_str = extra;
+ }
+}
+
+/**
+ * xmlRegexpErrCompile:
+ * @extra: extra information
+ *
+ * Handle a compilation failure
+ */
+static void
+xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra)
+{
+ if (ctxt != NULL) {
+ ctxt->error = XML_REGEXP_ERROR_COMPILE;
+ ctxt->error_str = extra;
+ }
+}
+
+/************************************************************************
+ * *
+ * Allocation/Deallocation *
+ * *
+ ************************************************************************/
+
+static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
+/**
+ * xmlRegEpxFromParse:
+ * @ctxt: the parser context used to build it
+ *
+ * Allocate a new regexp and fill it with the result from the parser
+ *
+ * Returns the new regexp or NULL in case of error
+ */
+static xmlRegexpPtr
+xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
+ xmlRegexpPtr ret;
+
+ ret = (xmlRegexpPtr) REGEXP_MALLOC(sizeof(xmlRegexp));
+ if (ret == NULL) {
+ xmlRegexpErrMemory(ctxt, "compiling regexp");
+ return(NULL);
+ }
+ memset(ret, 0, sizeof(xmlRegexp));
+ ret->string = ctxt->string;
+ ret->nbStates = ctxt->nbStates;
+ ret->states = ctxt->states;
+ ret->nbAtoms = ctxt->nbAtoms;
+ ret->atoms = ctxt->atoms;
+ ret->nbCounters = ctxt->nbCounters;
+ ret->counters = ctxt->counters;
+ ret->determinist = ctxt->determinist;
+ ret->flags = ctxt->flags;
+ if (ret->determinist == -1) {
+ xmlRegexpIsDeterminist(ret);
+ }
+
+ if ((ret->determinist != 0) &&
+ (ret->nbCounters == 0) &&
+ (ctxt->negs == 0) &&
+ (ret->atoms != NULL) &&
+ (ret->atoms[0] != NULL) &&
+ (ret->atoms[0]->type == XML_REGEXP_STRING)) {
+ int i, j, nbstates = 0, nbatoms = 0;
+ int *stateRemap;
+ int *stringRemap;
+ int *transitions;
+ void **transdata;
+ xmlChar **stringMap;
+ xmlChar *value;
+
+ /*
+ * Switch to a compact representation
+ * 1/ counting the effective number of states left
+ * 2/ counting the unique number of atoms, and check that
+ * they are all of the string type
+ * 3/ build a table state x atom for the transitions
+ */
+
+ stateRemap = REGEXP_MALLOC(ret->nbStates * sizeof(int));
+ if (stateRemap == NULL) {
+ xmlRegexpErrMemory(ctxt, "compiling regexp");
+ REGEXP_FREE(ret);
+ return(NULL);
+ }
+ for (i = 0;i < ret->nbStates;i++) {
+ if (ret->states[i] != NULL) {
+ stateRemap[i] = nbstates;
+ nbstates++;
+ } else {
+ stateRemap[i] = -1;
+ }
+ }
+#ifdef DEBUG_COMPACTION
+ printf("Final: %d states\n", nbstates);
+#endif
+ stringMap = REGEXP_MALLOC(ret->nbAtoms * sizeof(char *));
+ if (stringMap == NULL) {
+ xmlRegexpErrMemory(ctxt, "compiling regexp");
+ REGEXP_FREE(stateRemap);
+ REGEXP_FREE(ret);
+ return(NULL);
+ }
+ stringRemap = REGEXP_MALLOC(ret->nbAtoms * sizeof(int));
+ if (stringRemap == NULL) {
+ xmlRegexpErrMemory(ctxt, "compiling regexp");
+ REGEXP_FREE(stringMap);
+ REGEXP_FREE(stateRemap);
+ REGEXP_FREE(ret);
+ return(NULL);
+ }
+ for (i = 0;i < ret->nbAtoms;i++) {
+ if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&
+ (ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) {
+ value = ret->atoms[i]->valuep;
+ for (j = 0;j < nbatoms;j++) {
+ if (xmlStrEqual(stringMap[j], value)) {
+ stringRemap[i] = j;
+ break;
+ }
+ }
+ if (j >= nbatoms) {
+ stringRemap[i] = nbatoms;
+ stringMap[nbatoms] = xmlStrdup(value);
+ if (stringMap[nbatoms] == NULL) {
+ for (i = 0;i < nbatoms;i++)
+ REGEXP_FREE(stringMap[i]);
+ REGEXP_FREE(stringRemap);
+ REGEXP_FREE(stringMap);
+ REGEXP_FREE(stateRemap);
+ REGEXP_FREE(ret);
+ return(NULL);
+ }
+ nbatoms++;
+ }
+ } else {
+ REGEXP_FREE(stateRemap);
+ REGEXP_FREE(stringRemap);
+ for (i = 0;i < nbatoms;i++)
+ REGEXP_FREE(stringMap[i]);
+ REGEXP_FREE(stringMap);
+ REGEXP_FREE(ret);
+ return(NULL);
+ }
+ }
+#ifdef DEBUG_COMPACTION
+ printf("Final: %d atoms\n", nbatoms);
+#endif
+ transitions = (int *) REGEXP_MALLOC((nbstates + 1) *
+ (nbatoms + 1) * sizeof(int));
+ if (transitions == NULL) {
+ REGEXP_FREE(stateRemap);
+ REGEXP_FREE(stringRemap);
+ REGEXP_FREE(stringMap);
+ REGEXP_FREE(ret);
+ return(NULL);
+ }
+ memset(transitions, 0, (nbstates + 1) * (nbatoms + 1) * sizeof(int));
+
+ /*
+ * Allocate the transition table. The first entry for each
+ * state corresponds to the state type.
+ */
+ transdata = NULL;
+
+ for (i = 0;i < ret->nbStates;i++) {
+ int stateno, atomno, targetno, prev;
+ xmlRegStatePtr state;
+ xmlRegTransPtr trans;
+
+ stateno = stateRemap[i];
+ if (stateno == -1)
+ continue;
+ state = ret->states[i];
+
+ transitions[stateno * (nbatoms + 1)] = state->type;
+
+ for (j = 0;j < state->nbTrans;j++) {
+ trans = &(state->trans[j]);
+ if ((trans->to == -1) || (trans->atom == NULL))
+ continue;
+ atomno = stringRemap[trans->atom->no];
+ if ((trans->atom->data != NULL) && (transdata == NULL)) {
+ transdata = (void **) REGEXP_MALLOC(nbstates * nbatoms *
+ sizeof(void *));
+ if (transdata != NULL)
+ memset(transdata, 0,
+ nbstates * nbatoms * sizeof(void *));
+ else {
+ xmlRegexpErrMemory(ctxt, "compiling regexp");
+ break;
+ }
+ }
+ targetno = stateRemap[trans->to];
+ /*
+ * if the same atom can generate transitions to 2 different
+ * states then it means the automata is not determinist and
+ * the compact form can't be used !
+ */
+ prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
+ if (prev != 0) {
+ if (prev != targetno + 1) {
+ ret->determinist = 0;
+#ifdef DEBUG_COMPACTION
+ printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n",
+ i, j, trans->atom->no, trans->to, atomno, targetno);
+ printf(" previous to is %d\n", prev);
+#endif
+ if (transdata != NULL)
+ REGEXP_FREE(transdata);
+ REGEXP_FREE(transitions);
+ REGEXP_FREE(stateRemap);
+ REGEXP_FREE(stringRemap);
+ for (i = 0;i < nbatoms;i++)
+ REGEXP_FREE(stringMap[i]);
+ REGEXP_FREE(stringMap);
+ goto not_determ;
+ }
+ } else {
+#if 0
+ printf("State %d trans %d: atom %d to %d : %d to %d\n",
+ i, j, trans->atom->no, trans->to, atomno, targetno);
+#endif
+ transitions[stateno * (nbatoms + 1) + atomno + 1] =
+ targetno + 1; /* to avoid 0 */
+ if (transdata != NULL)
+ transdata[stateno * nbatoms + atomno] =
+ trans->atom->data;
+ }
+ }
+ }
+ ret->determinist = 1;
+#ifdef DEBUG_COMPACTION
+ /*
+ * Debug
+ */
+ for (i = 0;i < nbstates;i++) {
+ for (j = 0;j < nbatoms + 1;j++) {
+ printf("%02d ", transitions[i * (nbatoms + 1) + j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+#endif
+ /*
+ * Cleanup of the old data
+ */
+ if (ret->states != NULL) {
+ for (i = 0;i < ret->nbStates;i++)
+ xmlRegFreeState(ret->states[i]);
+ REGEXP_FREE(ret->states);
+ }
+ ret->states = NULL;
+ ret->nbStates = 0;
+ if (ret->atoms != NULL) {
+ for (i = 0;i < ret->nbAtoms;i++)
+ xmlRegFreeAtom(ret->atoms[i]);
+ REGEXP_FREE(ret->atoms);
+ }
+ ret->atoms = NULL;
+ ret->nbAtoms = 0;
+
+ ret->compact = transitions;
+ ret->transdata = transdata;
+ ret->stringMap = stringMap;
+ ret->nbstrings = nbatoms;
+ ret->nbstates = nbstates;
+ REGEXP_FREE(stateRemap);
+ REGEXP_FREE(stringRemap);
+ }
+not_determ:
+ ctxt->string = NULL;
+ ctxt->nbStates = 0;
+ ctxt->states = NULL;
+ ctxt->nbAtoms = 0;
+ ctxt->atoms = NULL;
+ ctxt->nbCounters = 0;
+ ctxt->counters = NULL;
+ return(ret);
+}
+
+/**
+ * xmlRegNewParserCtxt:
+ * @string: the string to parse
+ *
+ * Allocate a new regexp parser context
+ *
+ * Returns the new context or NULL in case of error
+ */
+static xmlRegParserCtxtPtr
+xmlRegNewParserCtxt(const xmlChar *string) {
+ xmlRegParserCtxtPtr ret;
+
+ ret = (xmlRegParserCtxtPtr) REGEXP_MALLOC(sizeof(xmlRegParserCtxt));
+ if (ret == NULL)
+ return(NULL);
+ memset(ret, 0, sizeof(xmlRegParserCtxt));
+ if (string != NULL)
+ ret->string = xmlStrdup(string);
+ ret->cur = ret->string;
+ ret->neg = 0;
+ ret->negs = 0;
+ ret->error = 0;
+ ret->determinist = -1;
+ return(ret);
+}
+
+/**
+ * xmlRegNewRange:
+ * @ctxt: the regexp parser context
+ * @neg: is that negative
+ * @type: the type of range
+ * @start: the start codepoint
+ * @end: the end codepoint
+ *
+ * Allocate a new regexp range
+ *
+ * Returns the new range or NULL in case of error
+ */
+static xmlRegRangePtr
+xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
+ int neg, xmlRegAtomType type, int start, int end) {
+ xmlRegRangePtr ret;
+
+ ret = (xmlRegRangePtr) REGEXP_MALLOC(sizeof(xmlRegRange));
+ if (ret == NULL) {
+ xmlRegexpErrMemory(ctxt, "allocating range");
+ return(NULL);
+ }
+ ret->neg = neg;
+ ret->type = type;
+ ret->start = start;
+ ret->end = end;
+ return(ret);
+}
+
+/**
+ * xmlRegFreeRange:
+ * @range: the regexp range
+ *
+ * Free a regexp range
+ */
+static void
+xmlRegFreeRange(xmlRegRangePtr range) {
+ if (range == NULL)
+ return;
+
+ if (range->blockName != NULL)
+ REGEXP_FREE(range->blockName);
+ REGEXP_FREE(range);
+}
+
+/**
+ * xmlRegCopyRange:
+ * @range: the regexp range
+ *
+ * Copy a regexp range
+ *
+ * Returns the new copy or NULL in case of error.
+ */
+static xmlRegRangePtr
+xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr range) {
+ xmlRegRangePtr ret;
+
+ if (range == NULL)
+ return(NULL);
+
+ ret = xmlRegNewRange(ctxt, range->neg, range->type, range->start,
+ range->end);
+ if (ret == NULL)
+ return(NULL);
+ if (range->blockName != NULL) {
+ ret->blockName = xmlStrdup(range->blockName);
+ if (ret->blockName == NULL) {
+ xmlRegexpErrMemory(ctxt, "allocating range");
+ xmlRegFreeRange(ret);
+ return(NULL);
+ }
+ }
+ return(ret);
+}
+
+/**
+ * xmlRegNewAtom:
+ * @ctxt: the regexp parser context
+ * @type: the type of atom
+ *
+ * Allocate a new atom
+ *
+ * Returns the new atom or NULL in case of error
+ */
+static xmlRegAtomPtr
+xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) {
+ xmlRegAtomPtr ret;
+
+ ret = (xmlRegAtomPtr) REGEXP_MALLOC(sizeof(xmlRegAtom));
+ if (ret == NULL) {
+ xmlRegexpErrMemory(ctxt, "allocating atom");
+ return(NULL);
+ }
+ memset(ret, 0, sizeof(xmlRegAtom));
+ ret->type = type;
+ ret->quant = XML_REGEXP_QUANT_ONCE;
+ ret->min = 0;
+ ret->max = 0;
+ return(ret);
+}
+
+/**
+ * xmlRegFreeAtom:
+ * @atom: the regexp atom
+ *
+ * Free a regexp atom
+ */
+static void
+xmlRegFreeAtom(xmlRegAtomPtr atom) {
+ int i;
+
+ if (atom == NULL)
+ return;
+
+ for (i = 0;i < atom->nbRanges;i++)
+ xmlRegFreeRange(atom->ranges[i]);
+ if (atom->ranges != NULL)
+ REGEXP_FREE(atom->ranges);
+ if ((atom->type == XML_REGEXP_STRING) && (atom->valuep != NULL))
+ REGEXP_FREE(atom->valuep);
+ if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 != NULL))
+ REGEXP_FREE(atom->valuep2);
+ if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep != NULL))
+ REGEXP_FREE(atom->valuep);
+ REGEXP_FREE(atom);
+}
+
+/**
+ * xmlRegCopyAtom:
+ * @ctxt: the regexp parser context
+ * @atom: the oiginal atom
+ *
+ * Allocate a new regexp range
+ *
+ * Returns the new atom or NULL in case of error
+ */
+static xmlRegAtomPtr
+xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
+ xmlRegAtomPtr ret;
+
+ ret = (xmlRegAtomPtr) REGEXP_MALLOC(sizeof(xmlRegAtom));
+ if (ret == NULL) {
+ xmlRegexpErrMemory(ctxt, "copying atom");
+ return(NULL);
+ }
+ memset(ret, 0, sizeof(xmlRegAtom));
+ ret->type = atom->type;
+ ret->quant = atom->quant;
+ ret->min = atom->min;
+ ret->max = atom->max;
+ if (atom->nbRanges > 0) {
+ int i;
+
+ ret->ranges = (xmlRegRangePtr *) REGEXP_MALLOC(sizeof(xmlRegRangePtr) *
+ atom->nbRanges);
+ if (ret->ranges == NULL) {
+ xmlRegexpErrMemory(ctxt, "copying atom");
+ goto error;
+ }
+ for (i = 0;i < atom->nbRanges;i++) {
+ ret->ranges[i] = xmlRegCopyRange(ctxt, atom->ranges[i]);
+ if (ret->ranges[i] == NULL)
+ goto error;
+ ret->nbRanges = i + 1;
+ }
+ }
+ return(ret);
+
+error:
+ xmlRegFreeAtom(ret);
+ return(NULL);
+}
+
+static xmlRegStatePtr
+xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
+ xmlRegStatePtr ret;
+
+ ret = (xmlRegStatePtr) REGEXP_MALLOC(sizeof(xmlRegState));
+ if (ret == NULL) {
+ xmlRegexpErrMemory(ctxt, "allocating state");
+ return(NULL);
+ }
+ memset(ret, 0, sizeof(xmlRegState));
+ ret->type = XML_REGEXP_TRANS_STATE;
+ ret->mark = XML_REGEXP_MARK_NORMAL;
+ return(ret);
+}
+
+/**
+ * xmlRegFreeState:
+ * @state: the regexp state
+ *
+ * Free a regexp state
+ */
+static void
+xmlRegFreeState(xmlRegStatePtr state) {
+ if (state == NULL)
+ return;
+
+ if (state->trans != NULL)
+ REGEXP_FREE(state->trans);
+ if (state->transTo != NULL)
+ REGEXP_FREE(state->transTo);
+ REGEXP_FREE(state);
+}
+
+/**
+ * xmlRegFreeParserCtxt:
+ * @ctxt: the regexp parser context
+ *
+ * Free a regexp parser context
+ */
+static void
+xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
+ int i;
+ if (ctxt == NULL)
+ return;
+
+ if (ctxt->string != NULL)
+ REGEXP_FREE(ctxt->string);
+ if (ctxt->states != NULL) {
+ for (i = 0;i < ctxt->nbStates;i++)
+ xmlRegFreeState(ctxt->states[i]);
+ REGEXP_FREE(ctxt->states);
+ }
+ if (ctxt->atoms != NULL) {
+ for (i = 0;i < ctxt->nbAtoms;i++)
+ xmlRegFreeAtom(ctxt->atoms[i]);
+ REGEXP_FREE(ctxt->atoms);
+ }
+ if (ctxt->counters != NULL)
+ REGEXP_FREE(ctxt->counters);
+ REGEXP_FREE(ctxt);
+}
+
+/************************************************************************
+ * *
+ * Finite Automata structures manipulations *
+ * *
+ ************************************************************************/
+
+static void
+xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
+ int neg, xmlRegAtomType type, int start, int end,
+ xmlChar *blockName) {
+ xmlRegRangePtr range;
+
+ if (atom == NULL) {
+ REGEXP_ERROR("add range: atom is NULL");
+ return;
+ }
+ if (atom->type != XML_REGEXP_RANGES) {
+ REGEXP_ERROR("add range: atom is not ranges");
+ return;
+ }
+ if (atom->maxRanges == 0) {
+ atom->maxRanges = 4;
+ atom->ranges = (xmlRegRangePtr *) REGEXP_MALLOC(atom->maxRanges *
+ sizeof(xmlRegRangePtr));
+ if (atom->ranges == NULL) {
+ xmlRegexpErrMemory(ctxt, "adding ranges");
+ atom->maxRanges = 0;
+ return;
+ }
+ } else if (atom->nbRanges >= atom->maxRanges) {
+ xmlRegRangePtr *tmp;
+ atom->maxRanges *= 2;
+ tmp = (xmlRegRangePtr *) REGEXP_REALLOC(atom->ranges, atom->maxRanges *
+ sizeof(xmlRegRangePtr));
+ if (tmp == NULL) {
+ xmlRegexpErrMemory(ctxt, "adding ranges");
+ atom->maxRanges /= 2;
+ return;
+ }
+ atom->ranges = tmp;
+ }
+ range = xmlRegNewRange(ctxt, neg, type, start, end);
+ if (range == NULL)
+ return;
+ range->blockName = blockName;
+ atom->ranges[atom->nbRanges++] = range;
+
+}
+
+static int
+xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
+ if (ctxt->maxCounters == 0) {
+ ctxt->maxCounters = 4;
+ ctxt->counters = (xmlRegCounter *) REGEXP_MALLOC(ctxt->maxCounters *
+ sizeof(xmlRegCounter));
+ if (ctxt->counters == NULL) {
+ xmlRegexpErrMemory(ctxt, "allocating counter");
+ ctxt->maxCounters = 0;
+ return(-1);
+ }
+ } else if (ctxt->nbCounters >= ctxt->maxCounters) {
+ xmlRegCounter *tmp;
+ ctxt->maxCounters *= 2;
+ tmp = (xmlRegCounter *) REGEXP_REALLOC(ctxt->counters, ctxt->maxCounters *
+ sizeof(xmlRegCounter));
+ if (tmp == NULL) {
+ xmlRegexpErrMemory(ctxt, "allocating counter");
+ ctxt->maxCounters /= 2;
+ return(-1);
+ }
+ ctxt->counters = tmp;
+ }
+ ctxt->counters[ctxt->nbCounters].min = -1;
+ ctxt->counters[ctxt->nbCounters].max = -1;
+ return(ctxt->nbCounters++);
+}
+
+static int
+xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
+ if (atom == NULL) {
+ REGEXP_ERROR("atom push: atom is NULL");
+ return(-1);
+ }
+ if (ctxt->maxAtoms == 0) {
+ ctxt->maxAtoms = 4;
+ ctxt->atoms = (xmlRegAtomPtr *) REGEXP_MALLOC(ctxt->maxAtoms *
+ sizeof(xmlRegAtomPtr));
+ if (ctxt->atoms == NULL) {
+ xmlRegexpErrMemory(ctxt, "pushing atom");
+ ctxt->maxAtoms = 0;
+ return(-1);
+ }
+ } else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
+ xmlRegAtomPtr *tmp;
+ ctxt->maxAtoms *= 2;
+ tmp = (xmlRegAtomPtr *) REGEXP_REALLOC(ctxt->atoms, ctxt->maxAtoms *
+ sizeof(xmlRegAtomPtr));
+ if (tmp == NULL) {
+ xmlRegexpErrMemory(ctxt, "allocating counter");
+ ctxt->maxAtoms /= 2;
+ return(-1);
+ }
+ ctxt->atoms = tmp;
+ }
+ atom->no = ctxt->nbAtoms;
+ ctxt->atoms[ctxt->nbAtoms++] = atom;
+ return(0);
+}
+
+static void
+xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
+ int from) {
+ if (target->maxTransTo == 0) {
+ target->maxTransTo = 8;
+ target->transTo = (int *) REGEXP_MALLOC(target->maxTransTo *
+ sizeof(int));
+ if (target->transTo == NULL) {
+ xmlRegexpErrMemory(ctxt, "adding transition");
+ target->maxTransTo = 0;
+ return;
+ }
+ } else if (target->nbTransTo >= target->maxTransTo) {
+ int *tmp;
+ target->maxTransTo *= 2;
+ tmp = (int *) REGEXP_REALLOC(target->transTo, target->maxTransTo *
+ sizeof(int));
+ if (tmp == NULL) {
+ xmlRegexpErrMemory(ctxt, "adding transition");
+ target->maxTransTo /= 2;
+ return;
+ }
+ target->transTo = tmp;
+ }
+ target->transTo[target->nbTransTo] = from;
+ target->nbTransTo++;
+}
+
+static void
+xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
+ xmlRegAtomPtr atom, xmlRegStatePtr target,
+ int counter, int count) {
+
+ int nrtrans;
+
+ if (state == NULL) {
+ REGEXP_ERROR("add state: state is NULL");
+ return;
+ }
+ if (target == NULL) {
+ REGEXP_ERROR("add state: target is NULL");
+ return;
+ }
+ /*
+ * Other routines follow the philosophy 'When in doubt, add a transition'
+ * so we check here whether such a transition is already present and, if
+ * so, silently ignore this request.
+ */
+
+ for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) {
+ xmlRegTransPtr trans = &(state->trans[nrtrans]);
+ if ((trans->atom == atom) &&
+ (trans->to == target->no) &&
+ (trans->counter == counter) &&
+ (trans->count == count)) {
+#ifdef DEBUG_REGEXP_GRAPH
+ printf("Ignoring duplicate transition from %d to %d\n",
+ state->no, target->no);
+#endif
+ return;
+ }
+ }
+
+ if (state->maxTrans == 0) {
+ state->maxTrans = 8;
+ state->trans = (xmlRegTrans *) REGEXP_MALLOC(state->maxTrans *
+ sizeof(xmlRegTrans));
+ if (state->trans == NULL) {
+ xmlRegexpErrMemory(ctxt, "adding transition");
+ state->maxTrans = 0;
+ return;
+ }
+ } else if (state->nbTrans >= state->maxTrans) {
+ xmlRegTrans *tmp;
+ state->maxTrans *= 2;
+ tmp = (xmlRegTrans *) REGEXP_REALLOC(state->trans, state->maxTrans *
+ sizeof(xmlRegTrans));
+ if (tmp == NULL) {
+ xmlRegexpErrMemory(ctxt, "adding transition");
+ state->maxTrans /= 2;
+ return;
+ }
+ state->trans = tmp;
+ }
+#ifdef DEBUG_REGEXP_GRAPH
+ printf("Add trans from %d to %d ", state->no, target->no);
+ if (count == REGEXP_ALL_COUNTER)
+ printf("all transition\n");
+ else if (count >= 0)
+ printf("count based %d\n", count);
+ else if (counter >= 0)
+ printf("counted %d\n", counter);
+ else if (atom == NULL)
+ printf("epsilon transition\n");
+ else if (atom != NULL)
+ xmlRegPrintAtom(stdout, atom);
+#endif
+
+ state->trans[state->nbTrans].atom = atom;
+ state->trans[state->nbTrans].to = target->no;
+ state->trans[state->nbTrans].counter = counter;
+ state->trans[state->nbTrans].count = count;
+ state->trans[state->nbTrans].nd = 0;
+ state->nbTrans++;
+ xmlRegStateAddTransTo(ctxt, target, state->no);
+}
+
+static int
+xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
+ if (state == NULL) return(-1);
+ if (ctxt->maxStates == 0) {
+ ctxt->maxStates = 4;
+ ctxt->states = (xmlRegStatePtr *) REGEXP_MALLOC(ctxt->maxStates *
+ sizeof(xmlRegStatePtr));
+ if (ctxt->states == NULL) {
+ xmlRegexpErrMemory(ctxt, "adding state");
+ ctxt->maxStates = 0;
+ return(-1);
+ }
+ } else if (ctxt->nbStates >= ctxt->maxStates) {
+ xmlRegStatePtr *tmp;
+ ctxt->maxStates *= 2;
+ tmp = (xmlRegStatePtr *) REGEXP_REALLOC(ctxt->states, ctxt->maxStates *
+ sizeof(xmlRegStatePtr));
+ if (tmp == NULL) {
+ xmlRegexpErrMemory(ctxt, "adding state");
+ ctxt->maxStates /= 2;
+ return(-1);
+ }
+ ctxt->states = tmp;
+ }
+ state->no = ctxt->nbStates;
+ ctxt->states[ctxt->nbStates++] = state;
+ return(0);
+}
+
+/**
+ * xmlFAGenerateEpsilonTransition:
+ * @ctxt: a regexp parser context
+ * @from: the from state
+ * @to: the target state or NULL for building a new one
+ *
+ */
+static void
+xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
+ xmlRegStatePtr from, xmlRegStatePtr to) {
+ if (to == NULL) {
+ to = xmlRegNewState(ctxt);
+ xmlRegStatePush(ctxt, to);
+ ctxt->state = to;
+ }
+ xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);
+}
+
+/**
+ * xmlFAGenerateCountedEpsilonTransition:
+ * @ctxt: a regexp parser context
+ * @from: the from state
+ * @to: the target state or NULL for building a new one
+ * counter: the counter for that transition
+ *
+ */
+static void
+xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
+ xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
+ if (to == NULL) {
+ to = xmlRegNewState(ctxt);
+ xmlRegStatePush(ctxt, to);
+ ctxt->state = to;
+ }
+ xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);
+}
+
+/**
+ * xmlFAGenerateCountedTransition:
+ * @ctxt: a regexp parser context
+ * @from: the from state
+ * @to: the target state or NULL for building a new one
+ * counter: the counter for that transition
+ *
+ */
+static void
+xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
+ xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
+ if (to == NULL) {
+ to = xmlRegNewState(ctxt);
+ xmlRegStatePush(ctxt, to);
+ ctxt->state = to;
+ }
+ xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);
+}
+
+/**
+ * xmlFAGenerateTransitions:
+ * @ctxt: a regexp parser context
+ * @from: the from state
+ * @to: the target state or NULL for building a new one
+ * @atom: the atom generating the transition
+ *
+ * Returns 0 if success and -1 in case of error.
+ */
+static int
+xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
+ xmlRegStatePtr to, xmlRegAtomPtr atom) {
+ xmlRegStatePtr end;
+
+ if (atom == NULL) {
+ REGEXP_ERROR("genrate transition: atom == NULL");
+ return(-1);
+ }
+ if (atom->type == XML_REGEXP_SUBREG) {
+ /*
+ * this is a subexpression handling one should not need to
+ * create a new node except for XML_REGEXP_QUANT_RANGE.
+ */
+ if (xmlRegAtomPush(ctxt, atom) < 0) {
+ return(-1);
+ }
+ if ((to != NULL) && (atom->stop != to) &&
+ (atom->quant != XML_REGEXP_QUANT_RANGE)) {
+ /*
+ * Generate an epsilon transition to link to the target
+ */
+ xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
+#ifdef DV
+ } else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) &&
+ (atom->quant != XML_REGEXP_QUANT_ONCE)) {
+ to = xmlRegNewState(ctxt);
+ xmlRegStatePush(ctxt, to);
+ ctxt->state = to;
+ xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
+#endif
+ }
+ switch (atom->quant) {
+ case XML_REGEXP_QUANT_OPT:
+ atom->quant = XML_REGEXP_QUANT_ONCE;
+ /*
+ * transition done to the state after end of atom.
+ * 1. set transition from atom start to new state
+ * 2. set transition from atom end to this state.
+ */
+ if (to == NULL) {
+ xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
+ xmlFAGenerateEpsilonTransition(ctxt, atom->stop,
+ ctxt->state);
+ } else {
+ xmlFAGenerateEpsilonTransition(ctxt, atom->start, to);
+ }
+ break;
+ case XML_REGEXP_QUANT_MULT:
+ atom->quant = XML_REGEXP_QUANT_ONCE;
+ xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
+ xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
+ break;
+ case XML_REGEXP_QUANT_PLUS:
+ atom->quant = XML_REGEXP_QUANT_ONCE;
+ xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
+ break;
+ case XML_REGEXP_QUANT_RANGE: {
+ int counter;
+ xmlRegStatePtr inter, newstate;
+
+ /*
+ * create the final state now if needed
+ */
+ if (to != NULL) {
+ newstate = to;
+ } else {
+ newstate = xmlRegNewState(ctxt);
+ xmlRegStatePush(ctxt, newstate);
+ }
+
+ /*
+ * The principle here is to use counted transition
+ * to avoid explosion in the number of states in the
+ * graph. This is clearly more complex but should not
+ * be exploitable at runtime.
+ */
+ if ((atom->min == 0) && (atom->start0 == NULL)) {
+ xmlRegAtomPtr copy;
+ /*
+ * duplicate a transition based on atom to count next
+ * occurences after 1. We cannot loop to atom->start
+ * directly because we need an epsilon transition to
+ * newstate.
+ */
+ /* ???? For some reason it seems we never reach that
+ case, I suppose this got optimized out before when
+ building the automata */
+ copy = xmlRegCopyAtom(ctxt, atom);
+ if (copy == NULL)
+ return(-1);
+ copy->quant = XML_REGEXP_QUANT_ONCE;
+ copy->min = 0;
+ copy->max = 0;
+
+ if (xmlFAGenerateTransitions(ctxt, atom->start, NULL, copy)
+ < 0)
+ return(-1);
+ inter = ctxt->state;
+ counter = xmlRegGetCounter(ctxt);
+ ctxt->counters[counter].min = atom->min - 1;
+ ctxt->counters[counter].max = atom->max - 1;
+ /* count the number of times we see it again */
+ xmlFAGenerateCountedEpsilonTransition(ctxt, inter,
+ atom->stop, counter);
+ /* allow a way out based on the count */
+ xmlFAGenerateCountedTransition(ctxt, inter,
+ newstate, counter);
+ /* and also allow a direct exit for 0 */
+ xmlFAGenerateEpsilonTransition(ctxt, atom->start,
+ newstate);
+ } else {
+ /*
+ * either we need the atom at least once or there
+ * is an atom->start0 allowing to easilly plug the
+ * epsilon transition.
+ */
+ counter = xmlRegGetCounter(ctxt);
+ ctxt->counters[counter].min = atom->min - 1;
+ ctxt->counters[counter].max = atom->max - 1;
+ /* count the number of times we see it again */
+ xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
+ atom->start, counter);
+ /* allow a way out based on the count */
+ xmlFAGenerateCountedTransition(ctxt, atom->stop,
+ newstate, counter);
+ /* and if needed allow a direct exit for 0 */
+ if (atom->min == 0)
+ xmlFAGenerateEpsilonTransition(ctxt, atom->start0,
+ newstate);
+
+ }
+ atom->min = 0;
+ atom->max = 0;
+ atom->quant = XML_REGEXP_QUANT_ONCE;
+ ctxt->state = newstate;
+ }
+ default:
+ break;
+ }
+ return(0);
+ }
+ if ((atom->min == 0) && (atom->max == 0) &&
+ (atom->quant == XML_REGEXP_QUANT_RANGE)) {
+ /*
+ * we can discard the atom and generate an epsilon transition instead
+ */
+ if (to == NULL) {
+ to = xmlRegNewState(ctxt);
+ if (to != NULL)
+ xmlRegStatePush(ctxt, to);
+ else {
+ return(-1);
+ }
+ }
+ xmlFAGenerateEpsilonTransition(ctxt, from, to);
+ ctxt->state = to;
+ xmlRegFreeAtom(atom);
+ return(0);
+ }
+ if (to == NULL) {
+ to = xmlRegNewState(ctxt);
+ if (to != NULL)
+ xmlRegStatePush(ctxt, to);
+ else {
+ return(-1);
+ }
+ }
+ end = to;
+ if ((atom->quant == XML_REGEXP_QUANT_MULT) ||
+ (atom->quant == XML_REGEXP_QUANT_PLUS)) {
+ /*
+ * Do not pollute the target state by adding transitions from
+ * it as it is likely to be the shared target of multiple branches.
+ * So isolate with an epsilon transition.
+ */
+ xmlRegStatePtr tmp;
+
+ tmp = xmlRegNewState(ctxt);
+ if (tmp != NULL)
+ xmlRegStatePush(ctxt, tmp);
+ else {
+ return(-1);
+ }
+ xmlFAGenerateEpsilonTransition(ctxt, tmp, to);
+ to = tmp;
+ }
+ if (xmlRegAtomPush(ctxt, atom) < 0) {
+ return(-1);
+ }
+ xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
+ ctxt->state = end;
+ switch (atom->quant) {
+ case XML_REGEXP_QUANT_OPT:
+ atom->quant = XML_REGEXP_QUANT_ONCE;
+ xmlFAGenerateEpsilonTransition(ctxt, from, to);
+ break;
+ case XML_REGEXP_QUANT_MULT:
+ atom->quant = XML_REGEXP_QUANT_ONCE;
+ xmlFAGenerateEpsilonTransition(ctxt, from, to);
+ xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
+ break;
+ case XML_REGEXP_QUANT_PLUS:
+ atom->quant = XML_REGEXP_QUANT_ONCE;
+ xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
+ break;
+ case XML_REGEXP_QUANT_RANGE:
+#if DV_test
+ if (atom->min == 0) {
+ xmlFAGenerateEpsilonTransition(ctxt, from, to);
+ }
+#endif
+ break;
+ default:
+ break;
+ }
+ return(0);
+}
+
+/**
+ * xmlFAReduceEpsilonTransitions:
+ * @ctxt: a regexp parser context
+ * @fromnr: the from state
+ * @tonr: the to state
+ * @counter: should that transition be associated to a counted
+ *
+ */
+static void
+xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
+ int tonr, int counter) {
+ int transnr;
+ xmlRegStatePtr from;
+ xmlRegStatePtr to;
+
+#ifdef DEBUG_REGEXP_GRAPH
+ printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
+#endif
+ from = ctxt->states[fromnr];
+ if (from == NULL)
+ return;
+ to = ctxt->states[tonr];
+ if (to == NULL)
+ return;
+ if ((to->mark == XML_REGEXP_MARK_START) ||
+ (to->mark == XML_REGEXP_MARK_VISITED))
+ return;
+
+ to->mark = XML_REGEXP_MARK_VISITED;
+ if (to->type == XML_REGEXP_FINAL_STATE) {
+#ifdef DEBUG_REGEXP_GRAPH
+ printf("State %d is final, so %d becomes final\n", tonr, fromnr);
+#endif
+ from->type = XML_REGEXP_FINAL_STATE;
+ }
+ for (transnr = 0;transnr < to->nbTrans;transnr++) {
+ if (to->trans[transnr].to < 0)
+ continue;
+ if (to->trans[transnr].atom == NULL) {
+ /*
+ * Don't remove counted transitions
+ * Don't loop either
+ */
+ if (to->trans[transnr].to != fromnr) {
+ if (to->trans[transnr].count >= 0) {
+ int newto = to->trans[transnr].to;
+
+ xmlRegStateAddTrans(ctxt, from, NULL,
+ ctxt->states[newto],
+ -1, to->trans[transnr].count);
+ } else {
+#ifdef DEBUG_REGEXP_GRAPH
+ printf("Found epsilon trans %d from %d to %d\n",
+ transnr, tonr, to->trans[transnr].to);
+#endif
+ if (to->trans[transnr].counter >= 0) {
+ xmlFAReduceEpsilonTransitions(ctxt, fromnr,
+ to->trans[transnr].to,
+ to->trans[transnr].counter);
+ } else {
+ xmlFAReduceEpsilonTransitions(ctxt, fromnr,
+ to->trans[transnr].to,
+ counter);
+ }
+ }
+ }
+ } else {
+ int newto = to->trans[transnr].to;
+
+ if (to->trans[transnr].counter >= 0) {
+ xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
+ ctxt->states[newto],
+ to->trans[transnr].counter, -1);
+ } else {
+ xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
+ ctxt->states[newto], counter, -1);
+ }
+ }
+ }
+ to->mark = XML_REGEXP_MARK_NORMAL;
+}
+
+/**
+ * xmlFAEliminateSimpleEpsilonTransitions:
+ * @ctxt: a regexp parser context
+ *
+ * Eliminating general epsilon transitions can get costly in the general
+ * algorithm due to the large amount of generated new transitions and
+ * associated comparisons. However for simple epsilon transition used just
+ * to separate building blocks when generating the automata this can be
+ * reduced to state elimination:
+ * - if there exists an epsilon from X to Y
+ * - if there is no other transition from X
+ * then X and Y are semantically equivalent and X can be eliminated
+ * If X is the start state then make Y the start state, else replace the
+ * target of all transitions to X by transitions to Y.
+ */
+static void
+xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
+ int statenr, i, j, newto;
+ xmlRegStatePtr state, tmp;
+
+ for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
+ state = ctxt->states[statenr];
+ if (state == NULL)
+ continue;
+ if (state->nbTrans != 1)
+ continue;
+ if (state->type == XML_REGEXP_UNREACH_STATE)
+ continue;
+ /* is the only transition out a basic transition */
+ if ((state->trans[0].atom == NULL) &&
+ (state->trans[0].to >= 0) &&
+ (state->trans[0].to != statenr) &&
+ (state->trans[0].counter < 0) &&
+ (state->trans[0].count < 0)) {
+ newto = state->trans[0].to;
+
+ if (state->type == XML_REGEXP_START_STATE) {
+#ifdef DEBUG_REGEXP_GRAPH
+ printf("Found simple epsilon trans from start %d to %d\n",
+ statenr, newto);
+#endif
+ } else {
+#ifdef DEBUG_REGEXP_GRAPH
+ printf("Found simple epsilon trans from %d to %d\n",
+ statenr, newto);
+#endif
+ for (i = 0;i < state->nbTransTo;i++) {
+ tmp = ctxt->states[state->transTo[i]];
+ for (j = 0;j < tmp->nbTrans;j++) {
+ if (tmp->trans[j].to == statenr) {
+#ifdef DEBUG_REGEXP_GRAPH
+ printf("Changed transition %d on %d to go to %d\n",
+ j, tmp->no, newto);
+#endif
+ tmp->trans[j].to = -1;
+ xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom,
+ ctxt->states[newto],
+ tmp->trans[j].counter,
+ tmp->trans[j].count);
+ }
+ }
+ }
+ if (state->type == XML_REGEXP_FINAL_STATE)
+ ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
+ /* eliminate the transition completely */
+ state->nbTrans = 0;
+
+ state->type = XML_REGEXP_UNREACH_STATE;
+
+ }
+
+ }
+ }
+}
+/**
+ * xmlFAEliminateEpsilonTransitions:
+ * @ctxt: a regexp parser context
+ *
+ */
+static void
+xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
+ int statenr, transnr;
+ xmlRegStatePtr state;
+ int has_epsilon;
+
+ if (ctxt->states == NULL) return;
+
+ /*
+ * Eliminate simple epsilon transition and the associated unreachable
+ * states.
+ */
+ xmlFAEliminateSimpleEpsilonTransitions(ctxt);
+ for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
+ state = ctxt->states[statenr];
+ if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) {
+#ifdef DEBUG_REGEXP_GRAPH
+ printf("Removed unreachable state %d\n", statenr);
+#endif
+ xmlRegFreeState(state);
+ ctxt->states[statenr] = NULL;
+ }
+ }
+
+ has_epsilon = 0;
+
+ /*
+ * Build the completed transitions bypassing the epsilons
+ * Use a marking algorithm to avoid loops
+ * Mark sink states too.
+ * Process from the latests states backward to the start when
+ * there is long cascading epsilon chains this minimize the
+ * recursions and transition compares when adding the new ones
+ */
+ for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) {
+ state = ctxt->states[statenr];
+ if (state == NULL)
+ continue;
+ if ((state->nbTrans == 0) &&
+ (state->type != XML_REGEXP_FINAL_STATE)) {
+ state->type = XML_REGEXP_SINK_STATE;
+ }
+ for (transnr = 0;transnr < state->nbTrans;transnr++) {
+ if ((state->trans[transnr].atom == NULL) &&
+ (state->trans[transnr].to >= 0)) {
+ if (state->trans[transnr].to == statenr) {
+ state->trans[transnr].to = -1;
+#ifdef DEBUG_REGEXP_GRAPH
+ printf("Removed loopback epsilon trans %d on %d\n",
+ transnr, statenr);
+#endif
+ } else if (state->trans[transnr].count < 0) {
+ int newto = state->trans[transnr].to;
+
+#ifdef DEBUG_REGEXP_GRAPH
+ printf("Found epsilon trans %d from %d to %d\n",
+ transnr, statenr, newto);
+#endif
+ has_epsilon = 1;
+ state->trans[transnr].to = -2;
+ state->mark = XML_REGEXP_MARK_START;
+ xmlFAReduceEpsilonTransitions(ctxt, statenr,
+ newto, state->trans[transnr].counter);
+ state->mark = XML_REGEXP_MARK_NORMAL;
+#ifdef DEBUG_REGEXP_GRAPH
+ } else {
+ printf("Found counted transition %d on %d\n",
+ transnr, statenr);
+#endif
+ }
+ }
+ }
+ }
+ /*
+ * Eliminate the epsilon transitions
+ */
+ if (has_epsilon) {
+ for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
+ state = ctxt->states[statenr];
+ if (state == NULL)
+ continue;
+ for (transnr = 0;transnr < state->nbTrans;transnr++) {
+ xmlRegTransPtr trans = &(state->trans[transnr]);
+ if ((trans->atom == NULL) &&
+ (trans->count < 0) &&
+ (trans->to >= 0)) {
+ trans->to = -1;
+ }
+ }
+ }
+ }
+
+ /*
+ * Use this pass to detect unreachable states too
+ */
+ for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
+ state = ctxt->states[statenr];
+ if (state != NULL)
+ state->reached = XML_REGEXP_MARK_NORMAL;
+ }
+ state = ctxt->states[0];
+ if (state != NULL)
+ state->reached = XML_REGEXP_MARK_START;
+ while (state != NULL) {
+ xmlRegStatePtr target = NULL;
+ state->reached = XML_REGEXP_MARK_VISITED;
+ /*
+ * Mark all states reachable from the current reachable state
+ */
+ for (transnr = 0;transnr < state->nbTrans;transnr++) {
+ if ((state->trans[transnr].to >= 0) &&
+ ((state->trans[transnr].atom != NULL) ||
+ (state->trans[transnr].count >= 0))) {
+ int newto = state->trans[transnr].to;
+
+ if (ctxt->states[newto] == NULL)
+ continue;
+ if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) {
+ ctxt->states[newto]->reached = XML_REGEXP_MARK_START;
+ target = ctxt->states[newto];
+ }
+ }
+ }
+
+ /*
+ * find the next accessible state not explored
+ */
+ if (target == NULL) {
+ for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
+ state = ctxt->states[statenr];
+ if ((state != NULL) && (state->reached ==
+ XML_REGEXP_MARK_START)) {
+ target = state;
+ break;
+ }
+ }
+ }
+ state = target;
+ }
+ for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
+ state = ctxt->states[statenr];
+ if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) {
+#ifdef DEBUG_REGEXP_GRAPH
+ printf("Removed unreachable state %d\n", statenr);
+#endif
+ xmlRegFreeState(state);
+ ctxt->states[statenr] = NULL;
+ }
+ }
+
+}
+
+static int
+xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
+ int ret = 0;
+
+ if ((range1->type == XML_REGEXP_RANGES) ||
+ (range2->type == XML_REGEXP_RANGES) ||
+ (range2->type == XML_REGEXP_SUBREG) ||
+ (range1->type == XML_REGEXP_SUBREG) ||
+ (range1->type == XML_REGEXP_STRING) ||
+ (range2->type == XML_REGEXP_STRING))
+ return(-1);
+
+ /* put them in order */
+ if (range1->type > range2->type) {
+ xmlRegRangePtr tmp;
+
+ tmp = range1;
+ range1 = range2;
+ range2 = tmp;
+ }
+ if ((range1->type == XML_REGEXP_ANYCHAR) ||
+ (range2->type == XML_REGEXP_ANYCHAR)) {
+ ret = 1;
+ } else if ((range1->type == XML_REGEXP_EPSILON) ||
+ (range2->type == XML_REGEXP_EPSILON)) {
+ return(0);
+ } else if (range1->type == range2->type) {
+ if (range1->type != XML_REGEXP_CHARVAL)
+ ret = 1;
+ else if ((range1->end < range2->start) ||
+ (range2->end < range1->start))
+ ret = 0;
+ else
+ ret = 1;
+ } else if (range1->type == XML_REGEXP_CHARVAL) {
+ int codepoint;
+ int neg = 0;
+
+ /*
+ * just check all codepoints in the range for acceptance,
+ * this is usually way cheaper since done only once at
+ * compilation than testing over and over at runtime or
+ * pushing too many states when evaluating.
+ */
+ if (((range1->neg == 0) && (range2->neg != 0)) ||
+ ((range1->neg != 0) && (range2->neg == 0)))
+ neg = 1;
+
+ for (codepoint = range1->start;codepoint <= range1->end ;codepoint++) {
+ ret = xmlRegCheckCharacterRange(range2->type, codepoint,
+ 0, range2->start, range2->end,
+ range2->blockName);
+ if (ret < 0)
+ return(-1);
+ if (((neg == 1) && (ret == 0)) ||
+ ((neg == 0) && (ret == 1)))
+ return(1);
+ }
+ return(0);
+ } else if ((range1->type == XML_REGEXP_BLOCK_NAME) ||
+ (range2->type == XML_REGEXP_BLOCK_NAME)) {
+ if (range1->type == range2->type) {
+ ret = xmlStrEqual(range1->blockName, range2->blockName);
+ } else {
+ /*
+ * comparing a block range with anything else is way
+ * too costly, and maintining the table is like too much
+ * memory too, so let's force the automata to save state
+ * here.
+ */
+ return(1);
+ }
+ } else if ((range1->type < XML_REGEXP_LETTER) ||
+ (range2->type < XML_REGEXP_LETTER)) {
+ if ((range1->type == XML_REGEXP_ANYSPACE) &&
+ (range2->type == XML_REGEXP_NOTSPACE))
+ ret = 0;
+ else if ((range1->type == XML_REGEXP_INITNAME) &&
+ (range2->type == XML_REGEXP_NOTINITNAME))
+ ret = 0;
+ else if ((range1->type == XML_REGEXP_NAMECHAR) &&
+ (range2->type == XML_REGEXP_NOTNAMECHAR))
+ ret = 0;
+ else if ((range1->type == XML_REGEXP_DECIMAL) &&
+ (range2->type == XML_REGEXP_NOTDECIMAL))
+ ret = 0;
+ else if ((range1->type == XML_REGEXP_REALCHAR) &&
+ (range2->type == XML_REGEXP_NOTREALCHAR))
+ ret = 0;
+ else {
+ /* same thing to limit complexity */
+ return(1);
+ }
+ } else {
+ ret = 0;
+ /* range1->type < range2->type here */
+ switch (range1->type) {
+ case XML_REGEXP_LETTER:
+ /* all disjoint except in the subgroups */
+ if ((range2->type == XML_REGEXP_LETTER_UPPERCASE) ||
+ (range2->type == XML_REGEXP_LETTER_LOWERCASE) ||
+ (range2->type == XML_REGEXP_LETTER_TITLECASE) ||
+ (range2->type == XML_REGEXP_LETTER_MODIFIER) ||
+ (range2->type == XML_REGEXP_LETTER_OTHERS))
+ ret = 1;
+ break;
+ case XML_REGEXP_MARK:
+ if ((range2->type == XML_REGEXP_MARK_NONSPACING) ||
+ (range2->type == XML_REGEXP_MARK_SPACECOMBINING) ||
+ (range2->type == XML_REGEXP_MARK_ENCLOSING))
+ ret = 1;
+ break;
+ case XML_REGEXP_NUMBER:
+ if ((range2->type == XML_REGEXP_NUMBER_DECIMAL) ||
+ (range2->type == XML_REGEXP_NUMBER_LETTER) ||
+ (range2->type == XML_REGEXP_NUMBER_OTHERS))
+ ret = 1;
+ break;
+ case XML_REGEXP_PUNCT:
+ if ((range2->type == XML_REGEXP_PUNCT_CONNECTOR) ||
+ (range2->type == XML_REGEXP_PUNCT_DASH) ||
+ (range2->type == XML_REGEXP_PUNCT_OPEN) ||
+ (range2->type == XML_REGEXP_PUNCT_CLOSE) ||
+ (range2->type == XML_REGEXP_PUNCT_INITQUOTE) ||
+ (range2->type == XML_REGEXP_PUNCT_FINQUOTE) ||
+ (range2->type == XML_REGEXP_PUNCT_OTHERS))
+ ret = 1;
+ break;
+ case XML_REGEXP_SEPAR:
+ if ((range2->type == XML_REGEXP_SEPAR_SPACE) ||
+ (range2->type == XML_REGEXP_SEPAR_LINE) ||
+ (range2->type == XML_REGEXP_SEPAR_PARA))
+ ret = 1;
+ break;
+ case XML_REGEXP_SYMBOL:
+ if ((range2->type == XML_REGEXP_SYMBOL_MATH) ||
+ (range2->type == XML_REGEXP_SYMBOL_CURRENCY) ||
+ (range2->type == XML_REGEXP_SYMBOL_MODIFIER) ||
+ (range2->type == XML_REGEXP_SYMBOL_OTHERS))
+ ret = 1;
+ break;
+ case XML_REGEXP_OTHER:
+ if ((range2->type == XML_REGEXP_OTHER_CONTROL) ||
+ (range2->type == XML_REGEXP_OTHER_FORMAT) ||
+ (range2->type == XML_REGEXP_OTHER_PRIVATE))
+ ret = 1;
+ break;
+ default:
+ if ((range2->type >= XML_REGEXP_LETTER) &&
+ (range2->type < XML_REGEXP_BLOCK_NAME))
+ ret = 0;
+ else {
+ /* safety net ! */
+ return(1);
+ }
+ }
+ }
+ if (((range1->neg == 0) && (range2->neg != 0)) ||
+ ((range1->neg != 0) && (range2->neg == 0)))
+ ret = !ret;
+ return(ret);
+}
+
+/**
+ * xmlFACompareAtomTypes:
+ * @type1: an atom type
+ * @type2: an atom type
+ *
+ * Compares two atoms type to check whether they intersect in some ways,
+ * this is used by xmlFACompareAtoms only
+ *
+ * Returns 1 if they may intersect and 0 otherwise
+ */
+static int
+xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
+ if ((type1 == XML_REGEXP_EPSILON) ||
+ (type1 == XML_REGEXP_CHARVAL) ||
+ (type1 == XML_REGEXP_RANGES) ||
+ (type1 == XML_REGEXP_SUBREG) ||
+ (type1 == XML_REGEXP_STRING) ||
+ (type1 == XML_REGEXP_ANYCHAR))
+ return(1);
+ if ((type2 == XML_REGEXP_EPSILON) ||
+ (type2 == XML_REGEXP_CHARVAL) ||
+ (type2 == XML_REGEXP_RANGES) ||
+ (type2 == XML_REGEXP_SUBREG) ||
+ (type2 == XML_REGEXP_STRING) ||
+ (type2 == XML_REGEXP_ANYCHAR))
+ return(1);
+
+ if (type1 == type2) return(1);
+
+ /* simplify subsequent compares by making sure type1 < type2 */
+ if (type1 > type2) {
+ xmlRegAtomType tmp = type1;
+ type1 = type2;
+ type2 = tmp;
+ }
+ switch (type1) {
+ case XML_REGEXP_ANYSPACE: /* \s */
+ /* can't be a letter, number, mark, pontuation, symbol */
+ if ((type2 == XML_REGEXP_NOTSPACE) ||
+ ((type2 >= XML_REGEXP_LETTER) &&
+ (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
+ ((type2 >= XML_REGEXP_NUMBER) &&
+ (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
+ ((type2 >= XML_REGEXP_MARK) &&
+ (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
+ ((type2 >= XML_REGEXP_PUNCT) &&
+ (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
+ ((type2 >= XML_REGEXP_SYMBOL) &&
+ (type2 <= XML_REGEXP_SYMBOL_OTHERS))
+ ) return(0);
+ break;
+ case XML_REGEXP_NOTSPACE: /* \S */
+ break;
+ case XML_REGEXP_INITNAME: /* \l */
+ /* can't be a number, mark, separator, pontuation, symbol or other */
+ if ((type2 == XML_REGEXP_NOTINITNAME) ||
+ ((type2 >= XML_REGEXP_NUMBER) &&
+ (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
+ ((type2 >= XML_REGEXP_MARK) &&
+ (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
+ ((type2 >= XML_REGEXP_SEPAR) &&
+ (type2 <= XML_REGEXP_SEPAR_PARA)) ||
+ ((type2 >= XML_REGEXP_PUNCT) &&
+ (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
+ ((type2 >= XML_REGEXP_SYMBOL) &&
+ (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
+ ((type2 >= XML_REGEXP_OTHER) &&
+ (type2 <= XML_REGEXP_OTHER_NA))
+ ) return(0);
+ break;
+ case XML_REGEXP_NOTINITNAME: /* \L */
+ break;
+ case XML_REGEXP_NAMECHAR: /* \c */
+ /* can't be a mark, separator, pontuation, symbol or other */
+ if ((type2 == XML_REGEXP_NOTNAMECHAR) ||
+ ((type2 >= XML_REGEXP_MARK) &&
+ (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
+ ((type2 >= XML_REGEXP_PUNCT) &&
+ (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
+ ((type2 >= XML_REGEXP_SEPAR) &&
+ (type2 <= XML_REGEXP_SEPAR_PARA)) ||
+ ((type2 >= XML_REGEXP_SYMBOL) &&
+ (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
+ ((type2 >= XML_REGEXP_OTHER) &&
+ (type2 <= XML_REGEXP_OTHER_NA))
+ ) return(0);
+ break;
+ case XML_REGEXP_NOTNAMECHAR: /* \C */
+ break;
+ case XML_REGEXP_DECIMAL: /* \d */
+ /* can't be a letter, mark, separator, pontuation, symbol or other */
+ if ((type2 == XML_REGEXP_NOTDECIMAL) ||
+ (type2 == XML_REGEXP_REALCHAR) ||
+ ((type2 >= XML_REGEXP_LETTER) &&
+ (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
+ ((type2 >= XML_REGEXP_MARK) &&
+ (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
+ ((type2 >= XML_REGEXP_PUNCT) &&
+ (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
+ ((type2 >= XML_REGEXP_SEPAR) &&
+ (type2 <= XML_REGEXP_SEPAR_PARA)) ||
+ ((type2 >= XML_REGEXP_SYMBOL) &&
+ (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
+ ((type2 >= XML_REGEXP_OTHER) &&
+ (type2 <= XML_REGEXP_OTHER_NA))
+ )return(0);
+ break;
+ case XML_REGEXP_NOTDECIMAL: /* \D */
+ break;
+ case XML_REGEXP_REALCHAR: /* \w */
+ /* can't be a mark, separator, pontuation, symbol or other */
+ if ((type2 == XML_REGEXP_NOTDECIMAL) ||
+ ((type2 >= XML_REGEXP_MARK) &&
+ (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
+ ((type2 >= XML_REGEXP_PUNCT) &&
+ (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
+ ((type2 >= XML_REGEXP_SEPAR) &&
+ (type2 <= XML_REGEXP_SEPAR_PARA)) ||
+ ((type2 >= XML_REGEXP_SYMBOL) &&
+ (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
+ ((type2 >= XML_REGEXP_OTHER) &&
+ (type2 <= XML_REGEXP_OTHER_NA))
+ )return(0);
+ break;
+ case XML_REGEXP_NOTREALCHAR: /* \W */
+ break;
+ /*
+ * at that point we know both type 1 and type2 are from
+ * character categories are ordered and are different,
+ * it becomes simple because this is a partition
+ */
+ case XML_REGEXP_LETTER:
+ if (type2 <= XML_REGEXP_LETTER_OTHERS)
+ return(1);
+ return(0);
+ case XML_REGEXP_LETTER_UPPERCASE:
+ case XML_REGEXP_LETTER_LOWERCASE:
+ case XML_REGEXP_LETTER_TITLECASE:
+ case XML_REGEXP_LETTER_MODIFIER:
+ case XML_REGEXP_LETTER_OTHERS:
+ return(0);
+ case XML_REGEXP_MARK:
+ if (type2 <= XML_REGEXP_MARK_ENCLOSING)
+ return(1);
+ return(0);
+ case XML_REGEXP_MARK_NONSPACING:
+ case XML_REGEXP_MARK_SPACECOMBINING:
+ case XML_REGEXP_MARK_ENCLOSING:
+ return(0);
+ case XML_REGEXP_NUMBER:
+ if (type2 <= XML_REGEXP_NUMBER_OTHERS)
+ return(1);
+ return(0);
+ case XML_REGEXP_NUMBER_DECIMAL:
+ case XML_REGEXP_NUMBER_LETTER:
+ case XML_REGEXP_NUMBER_OTHERS:
+ return(0);
+ case XML_REGEXP_PUNCT:
+ if (type2 <= XML_REGEXP_PUNCT_OTHERS)
+ return(1);
+ return(0);
+ case XML_REGEXP_PUNCT_CONNECTOR:
+ case XML_REGEXP_PUNCT_DASH:
+ case XML_REGEXP_PUNCT_OPEN:
+ case XML_REGEXP_PUNCT_CLOSE:
+ case XML_REGEXP_PUNCT_INITQUOTE:
+ case XML_REGEXP_PUNCT_FINQUOTE:
+ case XML_REGEXP_PUNCT_OTHERS:
+ return(0);
+ case XML_REGEXP_SEPAR:
+ if (type2 <= XML_REGEXP_SEPAR_PARA)
+ return(1);
+ return(0);
+ case XML_REGEXP_SEPAR_SPACE:
+ case XML_REGEXP_SEPAR_LINE:
+ case XML_REGEXP_SEPAR_PARA:
+ return(0);
+ case XML_REGEXP_SYMBOL:
+ if (type2 <= XML_REGEXP_SYMBOL_OTHERS)
+ return(1);
+ return(0);
+ case XML_REGEXP_SYMBOL_MATH:
+ case XML_REGEXP_SYMBOL_CURRENCY:
+ case XML_REGEXP_SYMBOL_MODIFIER:
+ case XML_REGEXP_SYMBOL_OTHERS:
+ return(0);
+ case XML_REGEXP_OTHER:
+ if (type2 <= XML_REGEXP_OTHER_NA)
+ return(1);
+ return(0);
+ case XML_REGEXP_OTHER_CONTROL:
+ case XML_REGEXP_OTHER_FORMAT:
+ case XML_REGEXP_OTHER_PRIVATE:
+ case XML_REGEXP_OTHER_NA:
+ return(0);
+ default:
+ break;
+ }
+ return(1);
+}
+
+/**
+ * xmlFAEqualAtoms:
+ * @atom1: an atom
+ * @atom2: an atom
+ * @deep: if not set only compare string pointers
+ *
+ * Compares two atoms to check whether they are the same exactly
+ * this is used to remove equivalent transitions
+ *
+ * Returns 1 if same and 0 otherwise
+ */
+static int
+xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
+ int ret = 0;
+
+ if (atom1 == atom2)
+ return(1);
+ if ((atom1 == NULL) || (atom2 == NULL))
+ return(0);
+
+ if (atom1->type != atom2->type)
+ return(0);
+ switch (atom1->type) {
+ case XML_REGEXP_EPSILON:
+ ret = 0;
+ break;
+ case XML_REGEXP_STRING:
+ if (!deep)
+ ret = (atom1->valuep == atom2->valuep);
+ else
+ ret = xmlStrEqual((xmlChar *)atom1->valuep,
+ (xmlChar *)atom2->valuep);
+ break;
+ case XML_REGEXP_CHARVAL:
+ ret = (atom1->codepoint == atom2->codepoint);
+ break;
+ case XML_REGEXP_RANGES:
+ /* too hard to do in the general case */
+ ret = 0;
+ default:
+ break;
+ }
+ return(ret);
+}
+
+/**
+ * xmlFACompareAtoms:
+ * @atom1: an atom
+ * @atom2: an atom
+ * @deep: if not set only compare string pointers
+ *
+ * Compares two atoms to check whether they intersect in some ways,
+ * this is used by xmlFAComputesDeterminism and xmlFARecurseDeterminism only
+ *
+ * Returns 1 if yes and 0 otherwise
+ */
+static int
+xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
+ int ret = 1;
+
+ if (atom1 == atom2)
+ return(1);
+ if ((atom1 == NULL) || (atom2 == NULL))
+ return(0);
+
+ if ((atom1->type == XML_REGEXP_ANYCHAR) ||
+ (atom2->type == XML_REGEXP_ANYCHAR))
+ return(1);
+
+ if (atom1->type > atom2->type) {
+ xmlRegAtomPtr tmp;
+ tmp = atom1;
+ atom1 = atom2;
+ atom2 = tmp;
+ }
+ if (atom1->type != atom2->type) {
+ ret = xmlFACompareAtomTypes(atom1->type, atom2->type);
+ /* if they can't intersect at the type level break now */
+ if (ret == 0)
+ return(0);
+ }
+ switch (atom1->type) {
+ case XML_REGEXP_STRING:
+ if (!deep)
+ ret = (atom1->valuep != atom2->valuep);
+ else
+ ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
+ (xmlChar *)atom2->valuep);
+ break;
+ case XML_REGEXP_EPSILON:
+ goto not_determinist;
+ case XML_REGEXP_CHARVAL:
+ if (atom2->type == XML_REGEXP_CHARVAL) {
+ ret = (atom1->codepoint == atom2->codepoint);
+ } else {
+ ret = xmlRegCheckCharacter(atom2, atom1->codepoint);
+ if (ret < 0)
+ ret = 1;
+ }
+ break;
+ case XML_REGEXP_RANGES:
+ if (atom2->type == XML_REGEXP_RANGES) {
+ int i, j, res;
+ xmlRegRangePtr r1, r2;
+
+ /*
+ * need to check that none of the ranges eventually matches
+ */
+ for (i = 0;i < atom1->nbRanges;i++) {
+ for (j = 0;j < atom2->nbRanges;j++) {
+ r1 = atom1->ranges[i];
+ r2 = atom2->ranges[j];
+ res = xmlFACompareRanges(r1, r2);
+ if (res == 1) {
+ ret = 1;
+ goto done;
+ }
+ }
+ }
+ ret = 0;
+ }
+ break;
+ default:
+ goto not_determinist;
+ }
+done:
+ if (atom1->neg != atom2->neg) {
+ ret = !ret;
+ }
+ if (ret == 0)
+ return(0);
+not_determinist:
+ return(1);
+}
+
+/**
+ * xmlFARecurseDeterminism:
+ * @ctxt: a regexp parser context
+ *
+ * Check whether the associated regexp is determinist,
+ * should be called after xmlFAEliminateEpsilonTransitions()
+ *
+ */
+static int
+xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
+ int to, xmlRegAtomPtr atom) {
+ int ret = 1;
+ int res;
+ int transnr, nbTrans;
+ xmlRegTransPtr t1;
+ int deep = 1;
+
+ if (state == NULL)
+ return(ret);
+
+ if (ctxt->flags & AM_AUTOMATA_RNG)
+ deep = 0;
+
+ /*
+ * don't recurse on transitions potentially added in the course of
+ * the elimination.
+ */
+ nbTrans = state->nbTrans;
+ for (transnr = 0;transnr < nbTrans;transnr++) {
+ t1 = &(state->trans[transnr]);
+ /*
+ * check transitions conflicting with the one looked at
+ */
+ if (t1->atom == NULL) {
+ if (t1->to < 0)
+ continue;
+ res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
+ to, atom);
+ if (res == 0) {
+ ret = 0;
+ /* t1->nd = 1; */
+ }
+ continue;
+ }
+ if (t1->to != to)
+ continue;
+ if (xmlFACompareAtoms(t1->atom, atom, deep)) {
+ ret = 0;
+ /* mark the transition as non-deterministic */
+ t1->nd = 1;
+ }
+ }
+ return(ret);
+}
+
+/**
+ * xmlFAComputesDeterminism:
+ * @ctxt: a regexp parser context
+ *
+ * Check whether the associated regexp is determinist,
+ * should be called after xmlFAEliminateEpsilonTransitions()
+ *
+ */
+static int
+xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
+ int statenr, transnr;
+ xmlRegStatePtr state;
+ xmlRegTransPtr t1, t2, last;
+ int i;
+ int ret = 1;
+ int deep = 1;
+
+#ifdef DEBUG_REGEXP_GRAPH
+ printf("xmlFAComputesDeterminism\n");
+ xmlRegPrintCtxt(stdout, ctxt);
+#endif
+ if (ctxt->determinist != -1)
+ return(ctxt->determinist);
+
+ if (ctxt->flags & AM_AUTOMATA_RNG)
+ deep = 0;
+
+ /*
+ * First cleanup the automata removing cancelled transitions
+ */
+ for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
+ state = ctxt->states[statenr];
+ if (state == NULL)
+ continue;
+ if (state->nbTrans < 2)
+ continue;
+ for (transnr = 0;transnr < state->nbTrans;transnr++) {
+ t1 = &(state->trans[transnr]);
+ /*
+ * Determinism checks in case of counted or all transitions
+ * will have to be handled separately
+ */
+ if (t1->atom == NULL) {
+ /* t1->nd = 1; */
+ continue;
+ }
+ if (t1->to == -1) /* eliminated */
+ continue;
+ for (i = 0;i < transnr;i++) {
+ t2 = &(state->trans[i]);
+ if (t2->to == -1) /* eliminated */
+ continue;
+ if (t2->atom != NULL) {
+ if (t1->to == t2->to) {
+ /*
+ * Here we use deep because we want to keep the
+ * transitions which indicate a conflict
+ */
+ if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) &&
+ (t1->counter == t2->counter) &&
+ (t1->count == t2->count))
+ t2->to = -1; /* eliminated */
+ }
+ }
+ }
+ }
+ }
+
+ /*
+ * Check for all states that there aren't 2 transitions
+ * with the same atom and a different target.
+ */
+ for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
+ state = ctxt->states[statenr];
+ if (state == NULL)
+ continue;
+ if (state->nbTrans < 2)
+ continue;
+ last = NULL;
+ for (transnr = 0;transnr < state->nbTrans;transnr++) {
+ t1 = &(state->trans[transnr]);
+ /*
+ * Determinism checks in case of counted or all transitions
+ * will have to be handled separately
+ */
+ if (t1->atom == NULL) {
+ continue;
+ }
+ if (t1->to == -1) /* eliminated */
+ continue;
+ for (i = 0;i < transnr;i++) {
+ t2 = &(state->trans[i]);
+ if (t2->to == -1) /* eliminated */
+ continue;
+ if (t2->atom != NULL) {
+ /*
+ * But here we don't use deep because we want to
+ * find transitions which indicate a conflict
+ */
+ if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
+ ret = 0;
+ /* mark the transitions as non-deterministic ones */
+ t1->nd = 1;
+ t2->nd = 1;
+ last = t1;
+ }
+ } else if (t1->to != -1) {
+ /*
+ * do the closure in case of remaining specific
+ * epsilon transitions like choices or all
+ */
+ ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
+ t2->to, t2->atom);
+ /* don't shortcut the computation so all non deterministic
+ transition get marked down
+ if (ret == 0)
+ return(0);
+ */
+ if (ret == 0) {
+ t1->nd = 1;
+ /* t2->nd = 1; */
+ last = t1;
+ }
+ }
+ }
+ /* don't shortcut the computation so all non deterministic
+ transition get marked down
+ if (ret == 0)
+ break; */
+ }
+
+ /*
+ * mark specifically the last non-deterministic transition
+ * from a state since there is no need to set-up rollback
+ * from it
+ */
+ if (last != NULL) {
+ last->nd = 2;
+ }
+
+ /* don't shortcut the computation so all non deterministic
+ transition get marked down
+ if (ret == 0)
+ break; */
+ }
+
+ ctxt->determinist = ret;
+ return(ret);
+}
+
+/************************************************************************
+ * *
+ * Routines to check input against transition atoms *
+ * *
+ ************************************************************************/
+
+static int
+xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
+ int start, int end, const xmlChar *blockName) {
+ int ret = 0;
+
+ switch (type) {
+ case XML_REGEXP_STRING:
+ case XML_REGEXP_SUBREG:
+ case XML_REGEXP_RANGES:
+ case XML_REGEXP_EPSILON:
+ return(-1);
+ case XML_REGEXP_ANYCHAR:
+ ret = ((codepoint != '\n') && (codepoint != '\r'));
+ break;
+ case XML_REGEXP_CHARVAL:
+ ret = ((codepoint >= start) && (codepoint <= end));
+ break;
+ case XML_REGEXP_NOTSPACE:
+ neg = !neg;
+ case XML_REGEXP_ANYSPACE:
+ ret = ((codepoint == '\n') || (codepoint == '\r') ||
+ (codepoint == '\t') || (codepoint == ' '));
+ break;
+ case XML_REGEXP_NOTINITNAME:
+ neg = !neg;
+ case XML_REGEXP_INITNAME:
+ ret = (xmlIsLetter(codepoint) ||
+ (codepoint == '_') || (codepoint == ':'));
+ break;
+ case XML_REGEXP_NOTNAMECHAR:
+ neg = !neg;
+ case XML_REGEXP_NAMECHAR:
+ ret = (xmlIsLetter(codepoint) || xmlIsDigit(codepoint) ||
+ (codepoint == '.') || (codepoint == '-') ||
+ (codepoint == '_') || (codepoint == ':') ||
+ xmlIsCombining(codepoint) || xmlIsExtender(codepoint));
+ break;
+ case XML_REGEXP_NOTDECIMAL:
+ neg = !neg;
+ case XML_REGEXP_DECIMAL:
+ ret = xmlUCSIsCatNd(codepoint);
+ break;
+ case XML_REGEXP_REALCHAR:
+ neg = !neg;
+ case XML_REGEXP_NOTREALCHAR:
+ ret = xmlUCSIsCatP(codepoint);
+ if (ret == 0)
+ ret = xmlUCSIsCatZ(codepoint);
+ if (ret == 0)
+ ret = xmlUCSIsCatC(codepoint);
+ break;
+ case XML_REGEXP_LETTER:
+ ret = xmlUCSIsCatL(codepoint);
+ break;
+ case XML_REGEXP_LETTER_UPPERCASE:
+ ret = xmlUCSIsCatLu(codepoint);
+ break;
+ case XML_REGEXP_LETTER_LOWERCASE:
+ ret = xmlUCSIsCatLl(codepoint);
+ break;
+ case XML_REGEXP_LETTER_TITLECASE:
+ ret = xmlUCSIsCatLt(codepoint);
+ break;
+ case XML_REGEXP_LETTER_MODIFIER:
+ ret = xmlUCSIsCatLm(codepoint);
+ break;
+ case XML_REGEXP_LETTER_OTHERS:
+ ret = xmlUCSIsCatLo(codepoint);
+ break;
+ case XML_REGEXP_MARK:
+ ret = xmlUCSIsCatM(codepoint);
+ break;
+ case XML_REGEXP_MARK_NONSPACING:
+ ret = xmlUCSIsCatMn(codepoint);
+ break;
+ case XML_REGEXP_MARK_SPACECOMBINING:
+ ret = xmlUCSIsCatMc(codepoint);
+ break;
+ case XML_REGEXP_MARK_ENCLOSING:
+ ret = xmlUCSIsCatMe(codepoint);
+ break;
+ case XML_REGEXP_NUMBER:
+ ret = xmlUCSIsCatN(codepoint);
+ break;
+ case XML_REGEXP_NUMBER_DECIMAL:
+ ret = xmlUCSIsCatNd(codepoint);
+ break;
+ case XML_REGEXP_NUMBER_LETTER:
+ ret = xmlUCSIsCatNl(codepoint);
+ break;
+ case XML_REGEXP_NUMBER_OTHERS:
+ ret = xmlUCSIsCatNo(codepoint);
+ break;
+ case XML_REGEXP_PUNCT:
+ ret = xmlUCSIsCatP(codepoint);
+ break;
+ case XML_REGEXP_PUNCT_CONNECTOR:
+ ret = xmlUCSIsCatPc(codepoint);
+ break;
+ case XML_REGEXP_PUNCT_DASH:
+ ret = xmlUCSIsCatPd(codepoint);
+ break;
+ case XML_REGEXP_PUNCT_OPEN:
+ ret = xmlUCSIsCatPs(codepoint);
+ break;
+ case XML_REGEXP_PUNCT_CLOSE:
+ ret = xmlUCSIsCatPe(codepoint);
+ break;
+ case XML_REGEXP_PUNCT_INITQUOTE:
+ ret = xmlUCSIsCatPi(codepoint);
+ break;
+ case XML_REGEXP_PUNCT_FINQUOTE:
+ ret = xmlUCSIsCatPf(codepoint);
+ break;
+ case XML_REGEXP_PUNCT_OTHERS:
+ ret = xmlUCSIsCatPo(codepoint);
+ break;
+ case XML_REGEXP_SEPAR:
+ ret = xmlUCSIsCatZ(codepoint);
+ break;
+ case XML_REGEXP_SEPAR_SPACE:
+ ret = xmlUCSIsCatZs(codepoint);
+ break;
+ case XML_REGEXP_SEPAR_LINE:
+ ret = xmlUCSIsCatZl(codepoint);
+ break;
+ case XML_REGEXP_SEPAR_PARA:
+ ret = xmlUCSIsCatZp(codepoint);
+ break;
+ case XML_REGEXP_SYMBOL:
+ ret = xmlUCSIsCatS(codepoint);
+ break;
+ case XML_REGEXP_SYMBOL_MATH:
+ ret = xmlUCSIsCatSm(codepoint);
+ break;
+ case XML_REGEXP_SYMBOL_CURRENCY:
+ ret = xmlUCSIsCatSc(codepoint);
+ break;
+ case XML_REGEXP_SYMBOL_MODIFIER:
+ ret = xmlUCSIsCatSk(codepoint);
+ break;
+ case XML_REGEXP_SYMBOL_OTHERS:
+ ret = xmlUCSIsCatSo(codepoint);
+ break;
+ case XML_REGEXP_OTHER:
+ ret = xmlUCSIsCatC(codepoint);
+ break;
+ case XML_REGEXP_OTHER_CONTROL:
+ ret = xmlUCSIsCatCc(codepoint);
+ break;
+ case XML_REGEXP_OTHER_FORMAT:
+ ret = xmlUCSIsCatCf(codepoint);
+ break;
+ case XML_REGEXP_OTHER_PRIVATE:
+ ret = xmlUCSIsCatCo(codepoint);
+ break;
+ case XML_REGEXP_OTHER_NA:
+ /* ret = xmlUCSIsCatCn(codepoint); */
+ /* Seems it doesn't exist anymore in recent Unicode releases */
+ ret = 0;
+ break;
+ case XML_REGEXP_BLOCK_NAME:
+ ret = xmlUCSIsBlock(codepoint, (const char *) blockName);
+ break;
+ }
+ if (neg)
+ return(!ret);
+ return(ret);
+}
+
+static int
+xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
+ int i, ret = 0;
+ xmlRegRangePtr range;
+
+ if ((atom == NULL) || (!xmlIsChar(codepoint)))
+ return(-1);
+
+ switch (atom->type) {
+ case XML_REGEXP_SUBREG:
+ case XML_REGEXP_EPSILON:
+ return(-1);
+ case XML_REGEXP_CHARVAL:
+ return(codepoint == atom->codepoint);
+ case XML_REGEXP_RANGES: {
+ int accept = 0;
+
+ for (i = 0;i < atom->nbRanges;i++) {
+ range = atom->ranges[i];
+ if (range->neg == 2) {
+ ret = xmlRegCheckCharacterRange(range->type, codepoint,
+ 0, range->start, range->end,
+ range->blockName);
+ if (ret != 0)
+ return(0); /* excluded char */
+ } else if (range->neg) {
+ ret = xmlRegCheckCharacterRange(range->type, codepoint,
+ 0, range->start, range->end,
+ range->blockName);
+ if (ret == 0)
+ accept = 1;
+ else
+ return(0);
+ } else {
+ ret = xmlRegCheckCharacterRange(range->type, codepoint,
+ 0, range->start, range->end,
+ range->blockName);
+ if (ret != 0)
+ accept = 1; /* might still be excluded */
+ }
+ }
+ return(accept);
+ }
+ case XML_REGEXP_STRING:
+ /*TODO XML_REGEXP_STRING */
+ return(-1);
+ case XML_REGEXP_ANYCHAR:
+ case XML_REGEXP_ANYSPACE:
+ case XML_REGEXP_NOTSPACE:
+ case XML_REGEXP_INITNAME:
+ case XML_REGEXP_NOTINITNAME:
+ case XML_REGEXP_NAMECHAR:
+ case XML_REGEXP_NOTNAMECHAR:
+ case XML_REGEXP_DECIMAL:
+ case XML_REGEXP_NOTDECIMAL:
+ case XML_REGEXP_REALCHAR:
+ case XML_REGEXP_NOTREALCHAR:
+ case XML_REGEXP_LETTER:
+ case XML_REGEXP_LETTER_UPPERCASE:
+ case XML_REGEXP_LETTER_LOWERCASE:
+ case XML_REGEXP_LETTER_TITLECASE:
+ case XML_REGEXP_LETTER_MODIFIER:
+ case XML_REGEXP_LETTER_OTHERS:
+ case XML_REGEXP_MARK:
+ case XML_REGEXP_MARK_NONSPACING:
+ case XML_REGEXP_MARK_SPACECOMBINING:
+ case XML_REGEXP_MARK_ENCLOSING:
+ case XML_REGEXP_NUMBER:
+ case XML_REGEXP_NUMBER_DECIMAL:
+ case XML_REGEXP_NUMBER_LETTER:
+ case XML_REGEXP_NUMBER_OTHERS:
+ case XML_REGEXP_PUNCT:
+ case XML_REGEXP_PUNCT_CONNECTOR:
+ case XML_REGEXP_PUNCT_DASH:
+ case XML_REGEXP_PUNCT_OPEN:
+ case XML_REGEXP_PUNCT_CLOSE:
+ case XML_REGEXP_PUNCT_INITQUOTE:
+ case XML_REGEXP_PUNCT_FINQUOTE:
+ case XML_REGEXP_PUNCT_OTHERS:
+ case XML_REGEXP_SEPAR:
+ case XML_REGEXP_SEPAR_SPACE:
+ case XML_REGEXP_SEPAR_LINE:
+ case XML_REGEXP_SEPAR_PARA:
+ case XML_REGEXP_SYMBOL:
+ case XML_REGEXP_SYMBOL_MATH:
+ case XML_REGEXP_SYMBOL_CURRENCY:
+ case XML_REGEXP_SYMBOL_MODIFIER:
+ case XML_REGEXP_SYMBOL_OTHERS:
+ case XML_REGEXP_OTHER:
+ case XML_REGEXP_OTHER_CONTROL:
+ case XML_REGEXP_OTHER_FORMAT:
+ case XML_REGEXP_OTHER_PRIVATE:
+ case XML_REGEXP_OTHER_NA:
+ case XML_REGEXP_BLOCK_NAME:
+ ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
+ (const xmlChar *)atom->valuep);
+ if (atom->neg)
+ ret = !ret;
+ break;
+ }
+ return(ret);
+}
+
+/************************************************************************
+ * *
+ * Saving and restoring state of an execution context *
+ * *
+ ************************************************************************/
+
+#ifdef DEBUG_REGEXP_EXEC
+static void
+xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
+ printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
+ if (exec->inputStack != NULL) {
+ int i;
+ printf(": ");
+ for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
+ printf("%s ", (const char *)
+ exec->inputStack[exec->inputStackNr - (i + 1)].value);
+ } else {
+ printf(": %s", &(exec->inputString[exec->index]));
+ }
+ printf("\n");
+}
+#endif
+
+static void
+xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
+#ifdef DEBUG_REGEXP_EXEC
+ printf("saving ");
+ exec->transno++;
+ xmlFARegDebugExec(exec);
+ exec->transno--;
+#endif
+#ifdef MAX_PUSH
+ if (exec->nbPush > MAX_PUSH) {
+ return;
+ }
+ exec->nbPush++;
+#endif
+
+ if (exec->maxRollbacks == 0) {
+ exec->maxRollbacks = 4;
+ exec->rollbacks = (xmlRegExecRollback *) REGEXP_MALLOC(exec->maxRollbacks *
+ sizeof(xmlRegExecRollback));
+ if (exec->rollbacks == NULL) {
+ xmlRegexpErrMemory(NULL, "saving regexp");
+ exec->maxRollbacks = 0;
+ return;
+ }
+ memset(exec->rollbacks, 0,
+ exec->maxRollbacks * sizeof(xmlRegExecRollback));
+ } else if (exec->nbRollbacks >= exec->maxRollbacks) {
+ xmlRegExecRollback *tmp;
+ int len = exec->maxRollbacks;
+
+ exec->maxRollbacks *= 2;
+ tmp = (xmlRegExecRollback *) REGEXP_REALLOC(exec->rollbacks,
+ exec->maxRollbacks * sizeof(xmlRegExecRollback));
+ if (tmp == NULL) {
+ xmlRegexpErrMemory(NULL, "saving regexp");
+ exec->maxRollbacks /= 2;
+ return;
+ }
+ exec->rollbacks = tmp;
+ tmp = &exec->rollbacks[len];
+ memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));
+ }
+ exec->rollbacks[exec->nbRollbacks].state = exec->state;
+ exec->rollbacks[exec->nbRollbacks].index = exec->index;
+ exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
+ if (exec->comp->nbCounters > 0) {
+ if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
+ exec->rollbacks[exec->nbRollbacks].counts = (int *)
+ REGEXP_MALLOC(exec->comp->nbCounters * sizeof(int));
+ if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
+ xmlRegexpErrMemory(NULL, "saving regexp");
+ exec->status = -5;
+ return;
+ }
+ }
+ memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
+ exec->comp->nbCounters * sizeof(int));
+ }
+ exec->nbRollbacks++;
+}
+
+static void
+xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
+ if (exec->nbRollbacks <= 0) {
+ exec->status = -1;
+#ifdef DEBUG_REGEXP_EXEC
+ printf("rollback failed on empty stack\n");
+#endif
+ return;
+ }
+ exec->nbRollbacks--;
+ exec->state = exec->rollbacks[exec->nbRollbacks].state;
+ exec->index = exec->rollbacks[exec->nbRollbacks].index;
+ exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
+ if (exec->comp->nbCounters > 0) {
+ if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
+#ifdef DEBUG_REGEXP_EXEC
+ fprintf(stderr, "exec save: allocation failed");
+#endif
+ exec->status = -6;
+ return;
+ }
+ memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
+ exec->comp->nbCounters * sizeof(int));
+ }
+
+#ifdef DEBUG_REGEXP_EXEC
+ printf("restored ");
+ xmlFARegDebugExec(exec);
+#endif
+}
+
+/************************************************************************
+ * *
+ * Verifier, running an input against a compiled regexp *
+ * *
+ ************************************************************************/
+
+static int
+xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
+ xmlRegExecCtxt execval;
+ xmlRegExecCtxtPtr exec = &execval;
+ int ret, codepoint = 0, len, deter;
+
+ exec->inputString = content;
+ exec->index = 0;
+ exec->nbPush = 0;
+ exec->determinist = 1;
+ exec->maxRollbacks = 0;
+ exec->nbRollbacks = 0;
+ exec->rollbacks = NULL;
+ exec->status = 0;
+ exec->comp = comp;
+ exec->state = comp->states[0];
+ exec->transno = 0;
+ exec->transcount = 0;
+ exec->inputStack = NULL;
+ exec->inputStackMax = 0;
+ if (comp->nbCounters > 0) {
+ exec->counts = (int *) REGEXP_MALLOC(comp->nbCounters * sizeof(int));
+ if (exec->counts == NULL) {
+ xmlRegexpErrMemory(NULL, "running regexp");
+ return(-1);
+ }
+ memset(exec->counts, 0, comp->nbCounters * sizeof(int));
+ } else
+ exec->counts = NULL;
+ while ((exec->status == 0) &&
+ ((exec->inputString[exec->index] != 0) ||
+ ((exec->state != NULL) &&
+ (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
+ xmlRegTransPtr trans;
+ xmlRegAtomPtr atom;
+
+ /*
+ * If end of input on non-terminal state, rollback, however we may
+ * still have epsilon like transition for counted transitions
+ * on counters, in that case don't break too early. Additionally,
+ * if we are working on a range like "AB{0,2}", where B is not present,
+ * we don't want to break.
+ */
+ len = 1;
+ if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) {
+ /*
+ * if there is a transition, we must check if
+ * atom allows minOccurs of 0
+ */
+ if (exec->transno < exec->state->nbTrans) {
+ trans = &exec->state->trans[exec->transno];
+ if (trans->to >=0) {
+ atom = trans->atom;
+ if (!((atom->min == 0) && (atom->max > 0)))
+ goto rollback;
+ }
+ } else
+ goto rollback;
+ }
+
+ exec->transcount = 0;
+ for (;exec->transno < exec->state->nbTrans;exec->transno++) {
+ trans = &exec->state->trans[exec->transno];
+ if (trans->to < 0)
+ continue;
+ atom = trans->atom;
+ ret = 0;
+ deter = 1;
+ if (trans->count >= 0) {
+ int count;
+ xmlRegCounterPtr counter;
+
+ if (exec->counts == NULL) {
+ exec->status = -1;
+ goto error;
+ }
+ /*
+ * A counted transition.
+ */
+
+ count = exec->counts[trans->count];
+ counter = &exec->comp->counters[trans->count];
+#ifdef DEBUG_REGEXP_EXEC
+ printf("testing count %d: val %d, min %d, max %d\n",
+ trans->count, count, counter->min, counter->max);
+#endif
+ ret = ((count >= counter->min) && (count <= counter->max));
+ if ((ret) && (counter->min != counter->max))
+ deter = 0;
+ } else if (atom == NULL) {
+#ifdef DEBUG_REGEXP_EXEC
+ fprintf(stderr, "epsilon transition left at runtime\n");
+#endif
+ exec->status = -2;
+ break;
+ } else if (exec->inputString[exec->index] != 0) {
+ codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
+ ret = xmlRegCheckCharacter(atom, codepoint);
+ if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) {
+ xmlRegStatePtr to = comp->states[trans->to];
+
+ /*
+ * this is a multiple input sequence
+ * If there is a counter associated increment it now.
+ * before potentially saving and rollback
+ * do not increment if the counter is already over the
+ * maximum limit in which case get to next transition
+ */
+ if (trans->counter >= 0) {
+ xmlRegCounterPtr counter;
+
+ if ((exec->counts == NULL) ||
+ (exec->comp == NULL) ||
+ (exec->comp->counters == NULL)) {
+ exec->status = -1;
+ goto error;
+ }
+ counter = &exec->comp->counters[trans->counter];
+ if (exec->counts[trans->counter] >= counter->max)
+ continue; /* for loop on transitions */
+
+#ifdef DEBUG_REGEXP_EXEC
+ printf("Increasing count %d\n", trans->counter);
+#endif
+ exec->counts[trans->counter]++;
+ }
+ if (exec->state->nbTrans > exec->transno + 1) {
+ xmlFARegExecSave(exec);
+ }
+ exec->transcount = 1;
+ do {
+ /*
+ * Try to progress as much as possible on the input
+ */
+ if (exec->transcount == atom->max) {
+ break;
+ }
+ exec->index += len;
+ /*
+ * End of input: stop here
+ */
+ if (exec->inputString[exec->index] == 0) {
+ exec->index -= len;
+ break;
+ }
+ if (exec->transcount >= atom->min) {
+ int transno = exec->transno;
+ xmlRegStatePtr state = exec->state;
+
+ /*
+ * The transition is acceptable save it
+ */
+ exec->transno = -1; /* trick */
+ exec->state = to;
+ xmlFARegExecSave(exec);
+ exec->transno = transno;
+ exec->state = state;
+ }
+ codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
+ len);
+ ret = xmlRegCheckCharacter(atom, codepoint);
+ exec->transcount++;
+ } while (ret == 1);
+ if (exec->transcount < atom->min)
+ ret = 0;
+
+ /*
+ * If the last check failed but one transition was found
+ * possible, rollback
+ */
+ if (ret < 0)
+ ret = 0;
+ if (ret == 0) {
+ goto rollback;
+ }
+ if (trans->counter >= 0) {
+ if (exec->counts == NULL) {
+ exec->status = -1;
+ goto error;
+ }
+#ifdef DEBUG_REGEXP_EXEC
+ printf("Decreasing count %d\n", trans->counter);
+#endif
+ exec->counts[trans->counter]--;
+ }
+ } else if ((ret == 0) && (atom->min == 0) && (atom->max > 0)) {
+ /*
+ * we don't match on the codepoint, but minOccurs of 0
+ * says that's ok. Setting len to 0 inhibits stepping
+ * over the codepoint.
+ */
+ exec->transcount = 1;
+ len = 0;
+ ret = 1;
+ }
+ } else if ((atom->min == 0) && (atom->max > 0)) {
+ /* another spot to match when minOccurs is 0 */
+ exec->transcount = 1;
+ len = 0;
+ ret = 1;
+ }
+ if (ret == 1) {
+ if ((trans->nd == 1) ||
+ ((trans->count >= 0) && (deter == 0) &&
+ (exec->state->nbTrans > exec->transno + 1))) {
+#ifdef DEBUG_REGEXP_EXEC
+ if (trans->nd == 1)
+ printf("Saving on nd transition atom %d for %c at %d\n",
+ trans->atom->no, codepoint, exec->index);
+ else
+ printf("Saving on counted transition count %d for %c at %d\n",
+ trans->count, codepoint, exec->index);
+#endif
+ xmlFARegExecSave(exec);
+ }
+ if (trans->counter >= 0) {
+ xmlRegCounterPtr counter;
+
+ /* make sure we don't go over the counter maximum value */
+ if ((exec->counts == NULL) ||
+ (exec->comp == NULL) ||
+ (exec->comp->counters == NULL)) {
+ exec->status = -1;
+ goto error;
+ }
+ counter = &exec->comp->counters[trans->counter];
+ if (exec->counts[trans->counter] >= counter->max)
+ continue; /* for loop on transitions */
+#ifdef DEBUG_REGEXP_EXEC
+ printf("Increasing count %d\n", trans->counter);
+#endif
+ exec->counts[trans->counter]++;
+ }
+ if ((trans->count >= 0) &&
+ (trans->count < REGEXP_ALL_COUNTER)) {
+ if (exec->counts == NULL) {
+ exec->status = -1;
+ goto error;
+ }
+#ifdef DEBUG_REGEXP_EXEC
+ printf("resetting count %d on transition\n",
+ trans->count);
+#endif
+ exec->counts[trans->count] = 0;
+ }
+#ifdef DEBUG_REGEXP_EXEC
+ printf("entering state %d\n", trans->to);
+#endif
+ exec->state = comp->states[trans->to];
+ exec->transno = 0;
+ if (trans->atom != NULL) {
+ exec->index += len;
+ }
+ goto progress;
+ } else if (ret < 0) {
+ exec->status = -4;
+ break;
+ }
+ }
+ if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
+rollback:
+ /*
+ * Failed to find a way out
+ */
+ exec->determinist = 0;
+#ifdef DEBUG_REGEXP_EXEC
+ printf("rollback from state %d on %d:%c\n", exec->state->no,
+ codepoint,codepoint);
+#endif
+ xmlFARegExecRollBack(exec);
+ }
+progress:
+ continue;
+ }
+error:
+ if (exec->rollbacks != NULL) {
+ if (exec->counts != NULL) {
+ int i;
+
+ for (i = 0;i < exec->maxRollbacks;i++)
+ if (exec->rollbacks[i].counts != NULL)
+ REGEXP_FREE(exec->rollbacks[i].counts);
+ }
+ REGEXP_FREE(exec->rollbacks);
+ }
+ if (exec->counts != NULL)
+ REGEXP_FREE(exec->counts);
+ if (exec->status == 0)
+ return(1);
+ if (exec->status == -1) {
+ if (exec->nbPush > MAX_PUSH)
+ return(-1);
+ return(0);
+ }
+ return(exec->status);
+}
+
+/************************************************************************
+ * *
+ * Progressive interface to the verifier one atom at a time *
+ * *
+ ************************************************************************/
+
+/**
+ * xmlRegStrEqualWildcard:
+ * @expStr: the string to be evaluated
+ * @valStr: the validation string
+ *
+ * Checks if both strings are equal or have the same content. "*"
+ * can be used as a wildcard in @valStr; "|" is used as a seperator of
+ * substrings in both @expStr and @valStr.
+ *
+ * Returns 1 if the comparison is satisfied and the number of substrings
+ * is equal, 0 otherwise.
+ */
+
+static int
+xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) {
+ if (expStr == valStr) return(1);
+ if (expStr == NULL) return(0);
+ if (valStr == NULL) return(0);
+ do {
+ /*
+ * Eval if we have a wildcard for the current item.
+ */
+ if (*expStr != *valStr) {
+ /* if one of them starts with a wildcard make valStr be it */
+ if (*valStr == '*') {
+ const xmlChar *tmp;
+
+ tmp = valStr;
+ valStr = expStr;
+ expStr = tmp;
+ }
+ if ((*valStr != 0) && (*expStr != 0) && (*expStr++ == '*')) {
+ do {
+ if (*valStr == XML_REG_STRING_SEPARATOR)
+ break;
+ valStr++;
+ } while (*valStr != 0);
+ continue;
+ } else
+ return(0);
+ }
+ expStr++;
+ valStr++;
+ } while (*valStr != 0);
+ if (*expStr != 0)
+ return (0);
+ else
+ return (1);
+}
+
+/************************************************************************
+ * *
+ * Parser for the Schemas Datatype Regular Expressions *
+ * http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs *
+ * *
+ ************************************************************************/
+
+/**
+ * xmlFAIsChar:
+ * @ctxt: a regexp parser context
+ *
+ * [10] Char ::= [^.\?*+()|#x5B#x5D]
+ */
+static int
+xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
+ int cur;
+ int len;
+
+ cur = CUR_SCHAR(ctxt->cur, len);
+ if ((cur == '.') || (cur == '\\') || (cur == '?') ||
+ (cur == '*') || (cur == '+') || (cur == '(') ||
+ (cur == ')') || (cur == '|') || (cur == 0x5B) ||
+ (cur == 0x5D) || (cur == 0))
+ return(-1);
+ return(cur);
+}
+
+/**
+ * xmlFAParseCharProp:
+ * @ctxt: a regexp parser context
+ *
+ * [27] charProp ::= IsCategory | IsBlock
+ * [28] IsCategory ::= Letters | Marks | Numbers | Punctuation |
+ * Separators | Symbols | Others
+ * [29] Letters ::= 'L' [ultmo]?
+ * [30] Marks ::= 'M' [nce]?
+ * [31] Numbers ::= 'N' [dlo]?
+ * [32] Punctuation ::= 'P' [cdseifo]?
+ * [33] Separators ::= 'Z' [slp]?
+ * [34] Symbols ::= 'S' [mcko]?
+ * [35] Others ::= 'C' [cfon]?
+ * [36] IsBlock ::= 'Is' [a-zA-Z0-9#x2D]+
+ */
+static void
+xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
+ int cur;
+ xmlRegAtomType type = (xmlRegAtomType) 0;
+ xmlChar *blockName = NULL;
+
+ cur = CUR;
+ if (cur == 'L') {
+ NEXT;
+ cur = CUR;
+ if (cur == 'u') {
+ NEXT;
+ type = XML_REGEXP_LETTER_UPPERCASE;
+ } else if (cur == 'l') {
+ NEXT;
+ type = XML_REGEXP_LETTER_LOWERCASE;
+ } else if (cur == 't') {
+ NEXT;
+ type = XML_REGEXP_LETTER_TITLECASE;
+ } else if (cur == 'm') {
+ NEXT;
+ type = XML_REGEXP_LETTER_MODIFIER;
+ } else if (cur == 'o') {
+ NEXT;
+ type = XML_REGEXP_LETTER_OTHERS;
+ } else {
+ type = XML_REGEXP_LETTER;
+ }
+ } else if (cur == 'M') {
+ NEXT;
+ cur = CUR;
+ if (cur == 'n') {
+ NEXT;
+ /* nonspacing */
+ type = XML_REGEXP_MARK_NONSPACING;
+ } else if (cur == 'c') {
+ NEXT;
+ /* spacing combining */
+ type = XML_REGEXP_MARK_SPACECOMBINING;
+ } else if (cur == 'e') {
+ NEXT;
+ /* enclosing */
+ type = XML_REGEXP_MARK_ENCLOSING;
+ } else {
+ /* all marks */
+ type = XML_REGEXP_MARK;
+ }
+ } else if (cur == 'N') {
+ NEXT;
+ cur = CUR;
+ if (cur == 'd') {
+ NEXT;
+ /* digital */
+ type = XML_REGEXP_NUMBER_DECIMAL;
+ } else if (cur == 'l') {
+ NEXT;
+ /* letter */
+ type = XML_REGEXP_NUMBER_LETTER;
+ } else if (cur == 'o') {
+ NEXT;
+ /* other */
+ type = XML_REGEXP_NUMBER_OTHERS;
+ } else {
+ /* all numbers */
+ type = XML_REGEXP_NUMBER;
+ }
+ } else if (cur == 'P') {
+ NEXT;
+ cur = CUR;
+ if (cur == 'c') {
+ NEXT;
+ /* connector */
+ type = XML_REGEXP_PUNCT_CONNECTOR;
+ } else if (cur == 'd') {
+ NEXT;
+ /* dash */
+ type = XML_REGEXP_PUNCT_DASH;
+ } else if (cur == 's') {
+ NEXT;
+ /* open */
+ type = XML_REGEXP_PUNCT_OPEN;
+ } else if (cur == 'e') {
+ NEXT;
+ /* close */
+ type = XML_REGEXP_PUNCT_CLOSE;
+ } else if (cur == 'i') {
+ NEXT;
+ /* initial quote */
+ type = XML_REGEXP_PUNCT_INITQUOTE;
+ } else if (cur == 'f') {
+ NEXT;
+ /* final quote */
+ type = XML_REGEXP_PUNCT_FINQUOTE;
+ } else if (cur == 'o') {
+ NEXT;
+ /* other */
+ type = XML_REGEXP_PUNCT_OTHERS;
+ } else {
+ /* all punctuation */
+ type = XML_REGEXP_PUNCT;
+ }
+ } else if (cur == 'Z') {
+ NEXT;
+ cur = CUR;
+ if (cur == 's') {
+ NEXT;
+ /* space */
+ type = XML_REGEXP_SEPAR_SPACE;
+ } else if (cur == 'l') {
+ NEXT;
+ /* line */
+ type = XML_REGEXP_SEPAR_LINE;
+ } else if (cur == 'p') {
+ NEXT;
+ /* paragraph */
+ type = XML_REGEXP_SEPAR_PARA;
+ } else {
+ /* all separators */
+ type = XML_REGEXP_SEPAR;
+ }
+ } else if (cur == 'S') {
+ NEXT;
+ cur = CUR;
+ if (cur == 'm') {
+ NEXT;
+ type = XML_REGEXP_SYMBOL_MATH;
+ /* math */
+ } else if (cur == 'c') {
+ NEXT;
+ type = XML_REGEXP_SYMBOL_CURRENCY;
+ /* currency */
+ } else if (cur == 'k') {
+ NEXT;
+ type = XML_REGEXP_SYMBOL_MODIFIER;
+ /* modifiers */
+ } else if (cur == 'o') {
+ NEXT;
+ type = XML_REGEXP_SYMBOL_OTHERS;
+ /* other */
+ } else {
+ /* all symbols */
+ type = XML_REGEXP_SYMBOL;
+ }
+ } else if (cur == 'C') {
+ NEXT;
+ cur = CUR;
+ if (cur == 'c') {
+ NEXT;
+ /* control */
+ type = XML_REGEXP_OTHER_CONTROL;
+ } else if (cur == 'f') {
+ NEXT;
+ /* format */
+ type = XML_REGEXP_OTHER_FORMAT;
+ } else if (cur == 'o') {
+ NEXT;
+ /* private use */
+ type = XML_REGEXP_OTHER_PRIVATE;
+ } else if (cur == 'n') {
+ NEXT;
+ /* not assigned */
+ type = XML_REGEXP_OTHER_NA;
+ } else {
+ /* all others */
+ type = XML_REGEXP_OTHER;
+ }
+ } else if (cur == 'I') {
+ const xmlChar *start;
+ NEXT;
+ cur = CUR;
+ if (cur != 's') {
+ REGEXP_ERROR("IsXXXX expected");
+ return;
+ }
+ NEXT;
+ start = ctxt->cur;
+ cur = CUR;
+ if (((cur >= 'a') && (cur <= 'z')) ||
+ ((cur >= 'A') && (cur <= 'Z')) ||
+ ((cur >= '0') && (cur <= '9')) ||
+ (cur == 0x2D)) {
+ NEXT;
+ cur = CUR;
+ while (((cur >= 'a') && (cur <= 'z')) ||
+ ((cur >= 'A') && (cur <= 'Z')) ||
+ ((cur >= '0') && (cur <= '9')) ||
+ (cur == 0x2D)) {
+ NEXT;
+ cur = CUR;
+ }
+ }
+ type = XML_REGEXP_BLOCK_NAME;
+ blockName = xmlStrndup(start, ctxt->cur - start);
+ } else {
+ REGEXP_ERROR("Unknown char property");
+ return;
+ }
+ if (ctxt->atom == NULL) {
+ ctxt->atom = xmlRegNewAtom(ctxt, type);
+ if (ctxt->atom != NULL)
+ ctxt->atom->valuep = blockName;
+ } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
+ xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
+ type, 0, 0, blockName);
+ }
+}
+
+/**
+ * xmlFAParseCharClassEsc:
+ * @ctxt: a regexp parser context
+ *
+ * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
+ * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
+ * [25] catEsc ::= '\p{' charProp '}'
+ * [26] complEsc ::= '\P{' charProp '}'
+ * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW])
+ */
+static void
+xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
+ int cur;
+
+ if (CUR == '.') {
+ if (ctxt->atom == NULL) {
+ ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
+ } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
+ xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
+ XML_REGEXP_ANYCHAR, 0, 0, NULL);
+ }
+ NEXT;
+ return;
+ }
+ if (CUR != '\\') {
+ REGEXP_ERROR("Escaped sequence: expecting \\");
+ return;
+ }
+ NEXT;
+ cur = CUR;
+ if (cur == 'p') {
+ NEXT;
+ if (CUR != '{') {
+ REGEXP_ERROR("Expecting '{'");
+ return;
+ }
+ NEXT;
+ xmlFAParseCharProp(ctxt);
+ if (CUR != '}') {
+ REGEXP_ERROR("Expecting '}'");
+ return;
+ }
+ NEXT;
+ } else if (cur == 'P') {
+ NEXT;
+ if (CUR != '{') {
+ REGEXP_ERROR("Expecting '{'");
+ return;
+ }
+ NEXT;
+ xmlFAParseCharProp(ctxt);
+ ctxt->atom->neg = 1;
+ if (CUR != '}') {
+ REGEXP_ERROR("Expecting '}'");
+ return;
+ }
+ NEXT;
+ } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||
+ (cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
+ (cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
+ (cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
+ (cur == 0x5E)) {
+ if (ctxt->atom == NULL) {
+ ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
+ if (ctxt->atom != NULL) {
+ switch (cur) {
+ case 'n':
+ ctxt->atom->codepoint = '\n';
+ break;
+ case 'r':
+ ctxt->atom->codepoint = '\r';
+ break;
+ case 't':
+ ctxt->atom->codepoint = '\t';
+ break;
+ default:
+ ctxt->atom->codepoint = cur;
+ }
+ }
+ } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
+ switch (cur) {
+ case 'n':
+ cur = '\n';
+ break;
+ case 'r':
+ cur = '\r';
+ break;
+ case 't':
+ cur = '\t';
+ break;
+ }
+ xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
+ XML_REGEXP_CHARVAL, cur, cur, NULL);
+ }
+ NEXT;
+ } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
+ (cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
+ (cur == 'w') || (cur == 'W')) {
+ xmlRegAtomType type = XML_REGEXP_ANYSPACE;
+
+ switch (cur) {
+ case 's':
+ type = XML_REGEXP_ANYSPACE;
+ break;
+ case 'S':
+ type = XML_REGEXP_NOTSPACE;
+ break;
+ case 'i':
+ type = XML_REGEXP_INITNAME;
+ break;
+ case 'I':
+ type = XML_REGEXP_NOTINITNAME;
+ break;
+ case 'c':
+ type = XML_REGEXP_NAMECHAR;
+ break;
+ case 'C':
+ type = XML_REGEXP_NOTNAMECHAR;
+ break;
+ case 'd':
+ type = XML_REGEXP_DECIMAL;
+ break;
+ case 'D':
+ type = XML_REGEXP_NOTDECIMAL;
+ break;
+ case 'w':
+ type = XML_REGEXP_REALCHAR;
+ break;
+ case 'W':
+ type = XML_REGEXP_NOTREALCHAR;
+ break;
+ }
+ NEXT;
+ if (ctxt->atom == NULL) {
+ ctxt->atom = xmlRegNewAtom(ctxt, type);
+ } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
+ xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
+ type, 0, 0, NULL);
+ }
+ } else {
+ REGEXP_ERROR("Wrong escape sequence, misuse of character '\\'");
+ }
+}
+
+/**
+ * xmlFAParseCharRange:
+ * @ctxt: a regexp parser context
+ *
+ * [17] charRange ::= seRange | XmlCharRef | XmlCharIncDash
+ * [18] seRange ::= charOrEsc '-' charOrEsc
+ * [20] charOrEsc ::= XmlChar | SingleCharEsc
+ * [21] XmlChar ::= [^\#x2D#x5B#x5D]
+ * [22] XmlCharIncDash ::= [^\#x5B#x5D]
+ */
+static void
+xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
+ int cur, len;
+ int start = -1;
+ int end = -1;
+
+ if (CUR == '\0') {
+ REGEXP_ERROR("Expecting ']'");
+ return;
+ }
+
+ cur = CUR;
+ if (cur == '\\') {
+ NEXT;
+ cur = CUR;
+ switch (cur) {
+ case 'n': start = 0xA; break;
+ case 'r': start = 0xD; break;
+ case 't': start = 0x9; break;
+ case '\\': case '|': case '.': case '-': case '^': case '?':
+ case '*': case '+': case '{': case '}': case '(': case ')':
+ case '[': case ']':
+ start = cur; break;
+ default:
+ REGEXP_ERROR("Invalid escape value");
+ return;
+ }
+ end = start;
+ len = 1;
+ } else if ((cur != 0x5B) && (cur != 0x5D)) {
+ end = start = CUR_SCHAR(ctxt->cur, len);
+ } else {
+ REGEXP_ERROR("Expecting a char range");
+ return;
+ }
+ /*
+ * Since we are "inside" a range, we can assume ctxt->cur is past
+ * the start of ctxt->string, and PREV should be safe
+ */
+ if ((start == '-') && (NXT(1) != ']') && (PREV != '[') && (PREV != '^')) {
+ NEXTL(len);
+ return;
+ }
+ NEXTL(len);
+ cur = CUR;
+ if ((cur != '-') || (NXT(1) == ']')) {
+ xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
+ XML_REGEXP_CHARVAL, start, end, NULL);
+ return;
+ }
+ NEXT;
+ cur = CUR;
+ if (cur == '\\') {
+ NEXT;
+ cur = CUR;
+ switch (cur) {
+ case 'n': end = 0xA; break;
+ case 'r': end = 0xD; break;
+ case 't': end = 0x9; break;
+ case '\\': case '|': case '.': case '-': case '^': case '?':
+ case '*': case '+': case '{': case '}': case '(': case ')':
+ case '[': case ']':
+ end = cur; break;
+ default:
+ REGEXP_ERROR("Invalid escape value");
+ return;
+ }
+ len = 1;
+ } else if ((cur != 0x5B) && (cur != 0x5D)) {
+ end = CUR_SCHAR(ctxt->cur, len);
+ } else {
+ REGEXP_ERROR("Expecting the end of a char range");
+ return;
+ }
+ NEXTL(len);
+ /* TODO check that the values are acceptable character ranges for XML */
+ if (end < start) {
+ REGEXP_ERROR("End of range is before start of range");
+ } else {
+ xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
+ XML_REGEXP_CHARVAL, start, end, NULL);
+ }
+ return;
+}
+
+/**
+ * xmlFAParsePosCharGroup:
+ * @ctxt: a regexp parser context
+ *
+ * [14] posCharGroup ::= ( charRange | charClassEsc )+
+ */
+static void
+xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
+ do {
+ if (CUR == '\\') {
+ xmlFAParseCharClassEsc(ctxt);
+ } else {
+ xmlFAParseCharRange(ctxt);
+ }
+ } while ((CUR != ']') && (CUR != '^') && (CUR != '-') &&
+ (CUR != 0) && (ctxt->error == 0));
+}
+
+/**
+ * xmlFAParseCharGroup:
+ * @ctxt: a regexp parser context
+ *
+ * [13] charGroup ::= posCharGroup | negCharGroup | charClassSub
+ * [15] negCharGroup ::= '^' posCharGroup
+ * [16] charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr
+ * [12] charClassExpr ::= '[' charGroup ']'
+ */
+static void
+xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
+ int n = ctxt->neg;
+ while ((CUR != ']') && (ctxt->error == 0)) {
+ if (CUR == '^') {
+ int neg = ctxt->neg;
+
+ NEXT;
+ ctxt->neg = !ctxt->neg;
+ xmlFAParsePosCharGroup(ctxt);
+ ctxt->neg = neg;
+ } else if ((CUR == '-') && (NXT(1) == '[')) {
+ int neg = ctxt->neg;
+ ctxt->neg = 2;
+ NEXT; /* eat the '-' */
+ NEXT; /* eat the '[' */
+ xmlFAParseCharGroup(ctxt);
+ if (CUR == ']') {
+ NEXT;
+ } else {
+ REGEXP_ERROR("charClassExpr: ']' expected");
+ break;
+ }
+ ctxt->neg = neg;
+ break;
+ } else if (CUR != ']') {
+ xmlFAParsePosCharGroup(ctxt);
+ }
+ }
+ ctxt->neg = n;
+}
+
+/**
+ * xmlFAParseCharClass:
+ * @ctxt: a regexp parser context
+ *
+ * [11] charClass ::= charClassEsc | charClassExpr
+ * [12] charClassExpr ::= '[' charGroup ']'
+ */
+static void
+xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
+ if (CUR == '[') {
+ NEXT;
+ ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
+ if (ctxt->atom == NULL)
+ return;
+ xmlFAParseCharGroup(ctxt);
+ if (CUR == ']') {
+ NEXT;
+ } else {
+ REGEXP_ERROR("xmlFAParseCharClass: ']' expected");
+ }
+ } else {
+ xmlFAParseCharClassEsc(ctxt);
+ }
+}
+
+/**
+ * xmlFAParseQuantExact:
+ * @ctxt: a regexp parser context
+ *
+ * [8] QuantExact ::= [0-9]+
+ *
+ * Returns 0 if success or -1 in case of error
+ */
+static int
+xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
+ int ret = 0;
+ int ok = 0;
+
+ while ((CUR >= '0') && (CUR <= '9')) {
+ ret = ret * 10 + (CUR - '0');
+ ok = 1;
+ NEXT;
+ }
+ if (ok != 1) {
+ return(-1);
+ }
+ return(ret);
+}
+
+/**
+ * xmlFAParseQuantifier:
+ * @ctxt: a regexp parser context
+ *
+ * [4] quantifier ::= [?*+] | ( '{' quantity '}' )
+ * [5] quantity ::= quantRange | quantMin | QuantExact
+ * [6] quantRange ::= QuantExact ',' QuantExact
+ * [7] quantMin ::= QuantExact ','
+ * [8] QuantExact ::= [0-9]+
+ */
+static int
+xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
+ int cur;
+
+ cur = CUR;
+ if ((cur == '?') || (cur == '*') || (cur == '+')) {
+ if (ctxt->atom != NULL) {
+ if (cur == '?')
+ ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
+ else if (cur == '*')
+ ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
+ else if (cur == '+')
+ ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
+ }
+ NEXT;
+ return(1);
+ }
+ if (cur == '{') {
+ int min = 0, max = 0;
+
+ NEXT;
+ cur = xmlFAParseQuantExact(ctxt);
+ if (cur >= 0)
+ min = cur;
+ if (CUR == ',') {
+ NEXT;
+ if (CUR == '}')
+ max = INT_MAX;
+ else {
+ cur = xmlFAParseQuantExact(ctxt);
+ if (cur >= 0)
+ max = cur;
+ else {
+ REGEXP_ERROR("Improper quantifier");
+ }
+ }
+ }
+ if (CUR == '}') {
+ NEXT;
+ } else {
+ REGEXP_ERROR("Unterminated quantifier");
+ }
+ if (max == 0)
+ max = min;
+ if (ctxt->atom != NULL) {
+ ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
+ ctxt->atom->min = min;
+ ctxt->atom->max = max;
+ }
+ return(1);
+ }
+ return(0);
+}
+
+/**
+ * xmlFAParseAtom:
+ * @ctxt: a regexp parser context
+ *
+ * [9] atom ::= Char | charClass | ( '(' regExp ')' )
+ */
+static int
+xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
+ int codepoint, len;
+
+ codepoint = xmlFAIsChar(ctxt);
+ if (codepoint > 0) {
+ ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
+ if (ctxt->atom == NULL)
+ return(-1);
+ codepoint = CUR_SCHAR(ctxt->cur, len);
+ ctxt->atom->codepoint = codepoint;
+ NEXTL(len);
+ return(1);
+ } else if (CUR == '|') {
+ return(0);
+ } else if (CUR == 0) {
+ return(0);
+ } else if (CUR == ')') {
+ return(0);
+ } else if (CUR == '(') {
+ xmlRegStatePtr start, oldend, start0;
+
+ NEXT;
+ /*
+ * this extra Epsilon transition is needed if we count with 0 allowed
+ * unfortunately this can't be known at that point
+ */
+ xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
+ start0 = ctxt->state;
+ xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
+ start = ctxt->state;
+ oldend = ctxt->end;
+ ctxt->end = NULL;
+ ctxt->atom = NULL;
+ xmlFAParseRegExp(ctxt, 0);
+ if (CUR == ')') {
+ NEXT;
+ } else {
+ REGEXP_ERROR("xmlFAParseAtom: expecting ')'");
+ }
+ ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
+ if (ctxt->atom == NULL)
+ return(-1);
+ ctxt->atom->start = start;
+ ctxt->atom->start0 = start0;
+ ctxt->atom->stop = ctxt->state;
+ ctxt->end = oldend;
+ return(1);
+ } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) {
+ xmlFAParseCharClass(ctxt);
+ return(1);
+ }
+ return(0);
+}
+
+/**
+ * xmlFAParsePiece:
+ * @ctxt: a regexp parser context
+ *
+ * [3] piece ::= atom quantifier?
+ */
+static int
+xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
+ int ret;
+
+ ctxt->atom = NULL;
+ ret = xmlFAParseAtom(ctxt);
+ if (ret == 0)
+ return(0);
+ if (ctxt->atom == NULL) {
+ REGEXP_ERROR("internal: no atom generated");
+ }
+ xmlFAParseQuantifier(ctxt);
+ return(1);
+}
+
+/**
+ * xmlFAParseBranch:
+ * @ctxt: a regexp parser context
+ * @to: optional target to the end of the branch
+ *
+ * @to is used to optimize by removing duplicate path in automata
+ * in expressions like (a|b)(c|d)
+ *
+ * [2] branch ::= piece*
+ */
+static int
+xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
+ xmlRegStatePtr previous;
+ int ret;
+
+ previous = ctxt->state;
+ ret = xmlFAParsePiece(ctxt);
+ if (ret != 0) {
+ if (xmlFAGenerateTransitions(ctxt, previous,
+ (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
+ return(-1);
+ previous = ctxt->state;
+ ctxt->atom = NULL;
+ }
+ while ((ret != 0) && (ctxt->error == 0)) {
+ ret = xmlFAParsePiece(ctxt);
+ if (ret != 0) {
+ if (xmlFAGenerateTransitions(ctxt, previous,
+ (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
+ return(-1);
+ previous = ctxt->state;
+ ctxt->atom = NULL;
+ }
+ }
+ return(0);
+}
+
+/**
+ * xmlFAParseRegExp:
+ * @ctxt: a regexp parser context
+ * @top: is this the top-level expression ?
+ *
+ * [1] regExp ::= branch ( '|' branch )*
+ */
+static void
+xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
+ xmlRegStatePtr start, end;
+
+ /* if not top start should have been generated by an epsilon trans */
+ start = ctxt->state;
+ ctxt->end = NULL;
+ xmlFAParseBranch(ctxt, NULL);
+ if (top) {
+#ifdef DEBUG_REGEXP_GRAPH
+ printf("State %d is final\n", ctxt->state->no);
+#endif
+ ctxt->state->type = XML_REGEXP_FINAL_STATE;
+ }
+ if (CUR != '|') {
+ ctxt->end = ctxt->state;
+ return;
+ }
+ end = ctxt->state;
+ while ((CUR == '|') && (ctxt->error == 0)) {
+ NEXT;
+ ctxt->state = start;
+ ctxt->end = NULL;
+ xmlFAParseBranch(ctxt, end);
+ }
+ if (!top) {
+ ctxt->state = end;
+ ctxt->end = end;
+ }
+}
+
+/************************************************************************
+ * *
+ * The basic API *
+ * *
+ ************************************************************************/
+
+/**
+ * xmlRegexpCompile:
+ * @regexp: a regular expression string
+ *
+ * Parses a regular expression conforming to XML Schemas Part 2 Datatype
+ * Appendix F and builds an automata suitable for testing strings against
+ * that regular expression
+ *
+ * Returns the compiled expression or NULL in case of error
+ */
+xmlRegexpPtr
+xmlRegexpCompile(const xmlChar *regexp) {
+ xmlRegexpPtr ret;
+ xmlRegParserCtxtPtr ctxt;
+
+ ctxt = xmlRegNewParserCtxt(regexp);
+ if (ctxt == NULL)
+ return(NULL);
+
+ /* initialize the parser */
+ ctxt->end = NULL;
+ ctxt->start = ctxt->state = xmlRegNewState(ctxt);
+ xmlRegStatePush(ctxt, ctxt->start);
+
+ /* parse the expression building an automata */
+ xmlFAParseRegExp(ctxt, 1);
+ if (CUR != 0) {
+ REGEXP_ERROR("xmlFAParseRegExp: extra characters");
+ }
+ if (ctxt->error != 0) {
+ xmlRegFreeParserCtxt(ctxt);
+ return(NULL);
+ }
+ ctxt->end = ctxt->state;
+ ctxt->start->type = XML_REGEXP_START_STATE;
+ ctxt->end->type = XML_REGEXP_FINAL_STATE;
+
+ /* remove the Epsilon except for counted transitions */
+ xmlFAEliminateEpsilonTransitions(ctxt);
+
+
+ if (ctxt->error != 0) {
+ xmlRegFreeParserCtxt(ctxt);
+ return(NULL);
+ }
+ ret = xmlRegEpxFromParse(ctxt);
+ xmlRegFreeParserCtxt(ctxt);
+ return(ret);
+}
+
+/**
+ * xmlRegexpExec:
+ * @comp: the compiled regular expression
+ * @content: the value to check against the regular expression
+ *
+ * Check if the regular expression generates the value
+ *
+ * Returns 1 if it matches, 0 if not and a negative value in case of error
+ */
+int
+xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
+ if ((comp == NULL) || (content == NULL))
+ return(-1);
+ return(xmlFARegExec(comp, content));
+}
+
+/**
+ * xmlRegexpIsDeterminist:
+ * @comp: the compiled regular expression
+ *
+ * Check if the regular expression is determinist
+ *
+ * Returns 1 if it yes, 0 if not and a negative value in case of error
+ */
+int
+xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
+ xmlAutomataPtr am;
+ int ret;
+
+ if (comp == NULL)
+ return(-1);
+ if (comp->determinist != -1)
+ return(comp->determinist);
+
+ am = xmlNewAutomata();
+ if (am->states != NULL) {
+ int i;
+
+ for (i = 0;i < am->nbStates;i++)
+ xmlRegFreeState(am->states[i]);
+ REGEXP_FREE(am->states);
+ }
+ am->nbAtoms = comp->nbAtoms;
+ am->atoms = comp->atoms;
+ am->nbStates = comp->nbStates;
+ am->states = comp->states;
+ am->determinist = -1;
+ am->flags = comp->flags;
+ ret = xmlFAComputesDeterminism(am);
+ am->atoms = NULL;
+ am->states = NULL;
+ xmlFreeAutomata(am);
+ comp->determinist = ret;
+ return(ret);
+}
+
+/**
+ * xmlRegFreeRegexp:
+ * @regexp: the regexp
+ *
+ * Free a regexp
+ */
+void
+xmlRegFreeRegexp(xmlRegexpPtr regexp) {
+ int i;
+ if (regexp == NULL)
+ return;
+
+ if (regexp->string != NULL)
+ REGEXP_FREE(regexp->string);
+ if (regexp->states != NULL) {
+ for (i = 0;i < regexp->nbStates;i++)
+ xmlRegFreeState(regexp->states[i]);
+ REGEXP_FREE(regexp->states);
+ }
+ if (regexp->atoms != NULL) {
+ for (i = 0;i < regexp->nbAtoms;i++)
+ xmlRegFreeAtom(regexp->atoms[i]);
+ REGEXP_FREE(regexp->atoms);
+ }
+ if (regexp->counters != NULL)
+ REGEXP_FREE(regexp->counters);
+ if (regexp->compact != NULL)
+ REGEXP_FREE(regexp->compact);
+ if (regexp->transdata != NULL)
+ REGEXP_FREE(regexp->transdata);
+ if (regexp->stringMap != NULL) {
+ for (i = 0; i < regexp->nbstrings;i++)
+ REGEXP_FREE(regexp->stringMap[i]);
+ REGEXP_FREE(regexp->stringMap);
+ }
+
+ REGEXP_FREE(regexp);
+}
+
+/************************************************************************
+ * *
+ * The Automata interface *
+ * *
+ ************************************************************************/
+
+/**
+ * xmlNewAutomata:
+ *
+ * Create a new automata
+ *
+ * Returns the new object or NULL in case of failure
+ */
+static xmlAutomataPtr
+xmlNewAutomata(void) {
+ xmlAutomataPtr ctxt;
+
+ ctxt = xmlRegNewParserCtxt(NULL);
+ if (ctxt == NULL)
+ return(NULL);
+
+ /* initialize the parser */
+ ctxt->end = NULL;
+ ctxt->start = ctxt->state = xmlRegNewState(ctxt);
+ if (ctxt->start == NULL) {
+ xmlFreeAutomata(ctxt);
+ return(NULL);
+ }
+ ctxt->start->type = XML_REGEXP_START_STATE;
+ if (xmlRegStatePush(ctxt, ctxt->start) < 0) {
+ xmlRegFreeState(ctxt->start);
+ xmlFreeAutomata(ctxt);
+ return(NULL);
+ }
+ ctxt->flags = 0;
+
+ return(ctxt);
+}
+
+/**
+ * xmlFreeAutomata:
+ * @am: an automata
+ *
+ * Free an automata
+ */
+static void
+xmlFreeAutomata(xmlAutomataPtr am) {
+ if (am == NULL)
+ return;
+ xmlRegFreeParserCtxt(am);
+}
+
+#include <xsde/c/post.h>
diff --git a/libxsde/xsde/c/regexp/xmlregexp.h b/libxsde/xsde/c/regexp/xmlregexp.h
new file mode 100644
index 0000000..948e3ca
--- /dev/null
+++ b/libxsde/xsde/c/regexp/xmlregexp.h
@@ -0,0 +1,39 @@
+/*
+ * Summary: regular expressions handling
+ * Description: basic API for libxml regular expressions handling used
+ * for XML Schemas and validation.
+ *
+ * See COPYING for the status of this software.
+ */
+
+#ifndef __XML_REGEXP_H__
+#define __XML_REGEXP_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * xmlRegexpPtr:
+ *
+ * A libxml regular expression, they can actually be far more complex
+ * thank the POSIX regex expressions.
+ */
+typedef struct _xmlRegexp xmlRegexp;
+typedef xmlRegexp *xmlRegexpPtr;
+
+typedef unsigned char xmlChar;
+
+/*
+ * The POSIX like API
+ */
+xmlRegexpPtr xmlRegexpCompile (const xmlChar *regexp);
+void xmlRegFreeRegexp (xmlRegexpPtr regexp);
+int xmlRegexpExec (xmlRegexpPtr comp, const xmlChar *value);
+int xmlRegexpIsDeterminist (xmlRegexpPtr comp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /*__XML_REGEXP_H__ */
diff --git a/libxsde/xsde/c/regexp/xmlunicode.c b/libxsde/xsde/c/regexp/xmlunicode.c
new file mode 100644
index 0000000..848c148
--- /dev/null
+++ b/libxsde/xsde/c/regexp/xmlunicode.c
@@ -0,0 +1,3172 @@
+/*
+ * xmlunicode.c: this module implements the Unicode character APIs
+ *
+ * This file is automatically generated from the
+ * UCS description files of the Unicode Character Database
+ * http://www.unicode.org/Public/4.0-Update1/UCD-4.0.1.html
+ * using the genUnicode.py Python script.
+ *
+ * Generation date: Mon Mar 27 11:09:52 2006
+ * Sources: Blocks-4.0.1.txt UnicodeData-4.0.1.txt
+ */
+#include <xsde/c/pre.h>
+
+#include <string.h>
+
+#include "xmlunicode.h"
+#include "chvalid.h"
+
+typedef int (xmlIntFunc)(int); /* just to keep one's mind untwisted */
+
+typedef struct {
+ const char *rangename;
+ xmlIntFunc *func;
+} xmlUnicodeRange;
+
+typedef struct {
+ xmlUnicodeRange *table;
+ int numentries;
+} xmlUnicodeNameTable;
+
+
+static xmlIntFunc *xmlUnicodeLookup(xmlUnicodeNameTable *tptr, const char *tname);
+
+static xmlUnicodeRange xmlUnicodeBlocks[] = {
+ {"AegeanNumbers", xmlUCSIsAegeanNumbers},
+ {"AlphabeticPresentationForms", xmlUCSIsAlphabeticPresentationForms},
+ {"Arabic", xmlUCSIsArabic},
+ {"ArabicPresentationForms-A", xmlUCSIsArabicPresentationFormsA},
+ {"ArabicPresentationForms-B", xmlUCSIsArabicPresentationFormsB},
+ {"Armenian", xmlUCSIsArmenian},
+ {"Arrows", xmlUCSIsArrows},
+ {"BasicLatin", xmlUCSIsBasicLatin},
+ {"Bengali", xmlUCSIsBengali},
+ {"BlockElements", xmlUCSIsBlockElements},
+ {"Bopomofo", xmlUCSIsBopomofo},
+ {"BopomofoExtended", xmlUCSIsBopomofoExtended},
+ {"BoxDrawing", xmlUCSIsBoxDrawing},
+ {"BraillePatterns", xmlUCSIsBraillePatterns},
+ {"Buhid", xmlUCSIsBuhid},
+ {"ByzantineMusicalSymbols", xmlUCSIsByzantineMusicalSymbols},
+ {"CJKCompatibility", xmlUCSIsCJKCompatibility},
+ {"CJKCompatibilityForms", xmlUCSIsCJKCompatibilityForms},
+ {"CJKCompatibilityIdeographs", xmlUCSIsCJKCompatibilityIdeographs},
+ {"CJKCompatibilityIdeographsSupplement", xmlUCSIsCJKCompatibilityIdeographsSupplement},
+ {"CJKRadicalsSupplement", xmlUCSIsCJKRadicalsSupplement},
+ {"CJKSymbolsandPunctuation", xmlUCSIsCJKSymbolsandPunctuation},
+ {"CJKUnifiedIdeographs", xmlUCSIsCJKUnifiedIdeographs},
+ {"CJKUnifiedIdeographsExtensionA", xmlUCSIsCJKUnifiedIdeographsExtensionA},
+ {"CJKUnifiedIdeographsExtensionB", xmlUCSIsCJKUnifiedIdeographsExtensionB},
+ {"Cherokee", xmlUCSIsCherokee},
+ {"CombiningDiacriticalMarks", xmlUCSIsCombiningDiacriticalMarks},
+ {"CombiningDiacriticalMarksforSymbols", xmlUCSIsCombiningDiacriticalMarksforSymbols},
+ {"CombiningHalfMarks", xmlUCSIsCombiningHalfMarks},
+ {"CombiningMarksforSymbols", xmlUCSIsCombiningMarksforSymbols},
+ {"ControlPictures", xmlUCSIsControlPictures},
+ {"CurrencySymbols", xmlUCSIsCurrencySymbols},
+ {"CypriotSyllabary", xmlUCSIsCypriotSyllabary},
+ {"Cyrillic", xmlUCSIsCyrillic},
+ {"CyrillicSupplement", xmlUCSIsCyrillicSupplement},
+ {"Deseret", xmlUCSIsDeseret},
+ {"Devanagari", xmlUCSIsDevanagari},
+ {"Dingbats", xmlUCSIsDingbats},
+ {"EnclosedAlphanumerics", xmlUCSIsEnclosedAlphanumerics},
+ {"EnclosedCJKLettersandMonths", xmlUCSIsEnclosedCJKLettersandMonths},
+ {"Ethiopic", xmlUCSIsEthiopic},
+ {"GeneralPunctuation", xmlUCSIsGeneralPunctuation},
+ {"GeometricShapes", xmlUCSIsGeometricShapes},
+ {"Georgian", xmlUCSIsGeorgian},
+ {"Gothic", xmlUCSIsGothic},
+ {"Greek", xmlUCSIsGreek},
+ {"GreekExtended", xmlUCSIsGreekExtended},
+ {"GreekandCoptic", xmlUCSIsGreekandCoptic},
+ {"Gujarati", xmlUCSIsGujarati},
+ {"Gurmukhi", xmlUCSIsGurmukhi},
+ {"HalfwidthandFullwidthForms", xmlUCSIsHalfwidthandFullwidthForms},
+ {"HangulCompatibilityJamo", xmlUCSIsHangulCompatibilityJamo},
+ {"HangulJamo", xmlUCSIsHangulJamo},
+ {"HangulSyllables", xmlUCSIsHangulSyllables},
+ {"Hanunoo", xmlUCSIsHanunoo},
+ {"Hebrew", xmlUCSIsHebrew},
+ {"HighPrivateUseSurrogates", xmlUCSIsHighPrivateUseSurrogates},
+ {"HighSurrogates", xmlUCSIsHighSurrogates},
+ {"Hiragana", xmlUCSIsHiragana},
+ {"IPAExtensions", xmlUCSIsIPAExtensions},
+ {"IdeographicDescriptionCharacters", xmlUCSIsIdeographicDescriptionCharacters},
+ {"Kanbun", xmlUCSIsKanbun},
+ {"KangxiRadicals", xmlUCSIsKangxiRadicals},
+ {"Kannada", xmlUCSIsKannada},
+ {"Katakana", xmlUCSIsKatakana},
+ {"KatakanaPhoneticExtensions", xmlUCSIsKatakanaPhoneticExtensions},
+ {"Khmer", xmlUCSIsKhmer},
+ {"KhmerSymbols", xmlUCSIsKhmerSymbols},
+ {"Lao", xmlUCSIsLao},
+ {"Latin-1Supplement", xmlUCSIsLatin1Supplement},
+ {"LatinExtended-A", xmlUCSIsLatinExtendedA},
+ {"LatinExtended-B", xmlUCSIsLatinExtendedB},
+ {"LatinExtendedAdditional", xmlUCSIsLatinExtendedAdditional},
+ {"LetterlikeSymbols", xmlUCSIsLetterlikeSymbols},
+ {"Limbu", xmlUCSIsLimbu},
+ {"LinearBIdeograms", xmlUCSIsLinearBIdeograms},
+ {"LinearBSyllabary", xmlUCSIsLinearBSyllabary},
+ {"LowSurrogates", xmlUCSIsLowSurrogates},
+ {"Malayalam", xmlUCSIsMalayalam},
+ {"MathematicalAlphanumericSymbols", xmlUCSIsMathematicalAlphanumericSymbols},
+ {"MathematicalOperators", xmlUCSIsMathematicalOperators},
+ {"MiscellaneousMathematicalSymbols-A", xmlUCSIsMiscellaneousMathematicalSymbolsA},
+ {"MiscellaneousMathematicalSymbols-B", xmlUCSIsMiscellaneousMathematicalSymbolsB},
+ {"MiscellaneousSymbols", xmlUCSIsMiscellaneousSymbols},
+ {"MiscellaneousSymbolsandArrows", xmlUCSIsMiscellaneousSymbolsandArrows},
+ {"MiscellaneousTechnical", xmlUCSIsMiscellaneousTechnical},
+ {"Mongolian", xmlUCSIsMongolian},
+ {"MusicalSymbols", xmlUCSIsMusicalSymbols},
+ {"Myanmar", xmlUCSIsMyanmar},
+ {"NumberForms", xmlUCSIsNumberForms},
+ {"Ogham", xmlUCSIsOgham},
+ {"OldItalic", xmlUCSIsOldItalic},
+ {"OpticalCharacterRecognition", xmlUCSIsOpticalCharacterRecognition},
+ {"Oriya", xmlUCSIsOriya},
+ {"Osmanya", xmlUCSIsOsmanya},
+ {"PhoneticExtensions", xmlUCSIsPhoneticExtensions},
+ {"PrivateUse", xmlUCSIsPrivateUse},
+ {"PrivateUseArea", xmlUCSIsPrivateUseArea},
+ {"Runic", xmlUCSIsRunic},
+ {"Shavian", xmlUCSIsShavian},
+ {"Sinhala", xmlUCSIsSinhala},
+ {"SmallFormVariants", xmlUCSIsSmallFormVariants},
+ {"SpacingModifierLetters", xmlUCSIsSpacingModifierLetters},
+ {"Specials", xmlUCSIsSpecials},
+ {"SuperscriptsandSubscripts", xmlUCSIsSuperscriptsandSubscripts},
+ {"SupplementalArrows-A", xmlUCSIsSupplementalArrowsA},
+ {"SupplementalArrows-B", xmlUCSIsSupplementalArrowsB},
+ {"SupplementalMathematicalOperators", xmlUCSIsSupplementalMathematicalOperators},
+ {"SupplementaryPrivateUseArea-A", xmlUCSIsSupplementaryPrivateUseAreaA},
+ {"SupplementaryPrivateUseArea-B", xmlUCSIsSupplementaryPrivateUseAreaB},
+ {"Syriac", xmlUCSIsSyriac},
+ {"Tagalog", xmlUCSIsTagalog},
+ {"Tagbanwa", xmlUCSIsTagbanwa},
+ {"Tags", xmlUCSIsTags},
+ {"TaiLe", xmlUCSIsTaiLe},
+ {"TaiXuanJingSymbols", xmlUCSIsTaiXuanJingSymbols},
+ {"Tamil", xmlUCSIsTamil},
+ {"Telugu", xmlUCSIsTelugu},
+ {"Thaana", xmlUCSIsThaana},
+ {"Thai", xmlUCSIsThai},
+ {"Tibetan", xmlUCSIsTibetan},
+ {"Ugaritic", xmlUCSIsUgaritic},
+ {"UnifiedCanadianAboriginalSyllabics", xmlUCSIsUnifiedCanadianAboriginalSyllabics},
+ {"VariationSelectors", xmlUCSIsVariationSelectors},
+ {"VariationSelectorsSupplement", xmlUCSIsVariationSelectorsSupplement},
+ {"YiRadicals", xmlUCSIsYiRadicals},
+ {"YiSyllables", xmlUCSIsYiSyllables},
+ {"YijingHexagramSymbols", xmlUCSIsYijingHexagramSymbols}};
+
+static xmlUnicodeRange xmlUnicodeCats[] = {
+ {"C", xmlUCSIsCatC},
+ {"Cc", xmlUCSIsCatCc},
+ {"Cf", xmlUCSIsCatCf},
+ {"Co", xmlUCSIsCatCo},
+ {"Cs", xmlUCSIsCatCs},
+ {"L", xmlUCSIsCatL},
+ {"Ll", xmlUCSIsCatLl},
+ {"Lm", xmlUCSIsCatLm},
+ {"Lo", xmlUCSIsCatLo},
+ {"Lt", xmlUCSIsCatLt},
+ {"Lu", xmlUCSIsCatLu},
+ {"M", xmlUCSIsCatM},
+ {"Mc", xmlUCSIsCatMc},
+ {"Me", xmlUCSIsCatMe},
+ {"Mn", xmlUCSIsCatMn},
+ {"N", xmlUCSIsCatN},
+ {"Nd", xmlUCSIsCatNd},
+ {"Nl", xmlUCSIsCatNl},
+ {"No", xmlUCSIsCatNo},
+ {"P", xmlUCSIsCatP},
+ {"Pc", xmlUCSIsCatPc},
+ {"Pd", xmlUCSIsCatPd},
+ {"Pe", xmlUCSIsCatPe},
+ {"Pf", xmlUCSIsCatPf},
+ {"Pi", xmlUCSIsCatPi},
+ {"Po", xmlUCSIsCatPo},
+ {"Ps", xmlUCSIsCatPs},
+ {"S", xmlUCSIsCatS},
+ {"Sc", xmlUCSIsCatSc},
+ {"Sk", xmlUCSIsCatSk},
+ {"Sm", xmlUCSIsCatSm},
+ {"So", xmlUCSIsCatSo},
+ {"Z", xmlUCSIsCatZ},
+ {"Zl", xmlUCSIsCatZl},
+ {"Zp", xmlUCSIsCatZp},
+ {"Zs", xmlUCSIsCatZs}};
+
+static const xmlChSRange xmlCS[] = {{0x0, 0x1f}, {0x7f, 0x9f},
+ {0xad, 0xad}, {0x600, 0x603}, {0x6dd, 0x6dd}, {0x70f, 0x70f},
+ {0x17b4, 0x17b5}, {0x200b, 0x200f}, {0x202a, 0x202e}, {0x2060, 0x2063},
+ {0x206a, 0x206f}, {0xd800, 0xd800}, {0xdb7f, 0xdb80}, {0xdbff, 0xdc00},
+ {0xdfff, 0xe000}, {0xf8ff, 0xf8ff}, {0xfeff, 0xfeff}, {0xfff9, 0xfffb} };
+static const xmlChLRange xmlCL[] = {{0x1d173, 0x1d17a}, {0xe0001, 0xe0001},
+ {0xe0020, 0xe007f}, {0xf0000, 0xf0000}, {0xffffd, 0xffffd},
+ {0x100000, 0x100000}, {0x10fffd, 0x10fffd} };
+static xmlChRangeGroup xmlCG = {18,7,xmlCS,xmlCL};
+
+static const xmlChSRange xmlCfS[] = {{0xad, 0xad}, {0x600, 0x603},
+ {0x6dd, 0x6dd}, {0x70f, 0x70f}, {0x17b4, 0x17b5}, {0x200b, 0x200f},
+ {0x202a, 0x202e}, {0x2060, 0x2063}, {0x206a, 0x206f}, {0xfeff, 0xfeff},
+ {0xfff9, 0xfffb} };
+static const xmlChLRange xmlCfL[] = {{0x1d173, 0x1d17a}, {0xe0001, 0xe0001},
+ {0xe0020, 0xe007f} };
+static xmlChRangeGroup xmlCfG = {11,3,xmlCfS,xmlCfL};
+
+static const xmlChSRange xmlLS[] = {{0x41, 0x5a}, {0x61, 0x7a},
+ {0xaa, 0xaa}, {0xb5, 0xb5}, {0xba, 0xba}, {0xc0, 0xd6}, {0xd8, 0xf6},
+ {0xf8, 0x236}, {0x250, 0x2c1}, {0x2c6, 0x2d1}, {0x2e0, 0x2e4},
+ {0x2ee, 0x2ee}, {0x37a, 0x37a}, {0x386, 0x386}, {0x388, 0x38a},
+ {0x38c, 0x38c}, {0x38e, 0x3a1}, {0x3a3, 0x3ce}, {0x3d0, 0x3f5},
+ {0x3f7, 0x3fb}, {0x400, 0x481}, {0x48a, 0x4ce}, {0x4d0, 0x4f5},
+ {0x4f8, 0x4f9}, {0x500, 0x50f}, {0x531, 0x556}, {0x559, 0x559},
+ {0x561, 0x587}, {0x5d0, 0x5ea}, {0x5f0, 0x5f2}, {0x621, 0x63a},
+ {0x640, 0x64a}, {0x66e, 0x66f}, {0x671, 0x6d3}, {0x6d5, 0x6d5},
+ {0x6e5, 0x6e6}, {0x6ee, 0x6ef}, {0x6fa, 0x6fc}, {0x6ff, 0x6ff},
+ {0x710, 0x710}, {0x712, 0x72f}, {0x74d, 0x74f}, {0x780, 0x7a5},
+ {0x7b1, 0x7b1}, {0x904, 0x939}, {0x93d, 0x93d}, {0x950, 0x950},
+ {0x958, 0x961}, {0x985, 0x98c}, {0x98f, 0x990}, {0x993, 0x9a8},
+ {0x9aa, 0x9b0}, {0x9b2, 0x9b2}, {0x9b6, 0x9b9}, {0x9bd, 0x9bd},
+ {0x9dc, 0x9dd}, {0x9df, 0x9e1}, {0x9f0, 0x9f1}, {0xa05, 0xa0a},
+ {0xa0f, 0xa10}, {0xa13, 0xa28}, {0xa2a, 0xa30}, {0xa32, 0xa33},
+ {0xa35, 0xa36}, {0xa38, 0xa39}, {0xa59, 0xa5c}, {0xa5e, 0xa5e},
+ {0xa72, 0xa74}, {0xa85, 0xa8d}, {0xa8f, 0xa91}, {0xa93, 0xaa8},
+ {0xaaa, 0xab0}, {0xab2, 0xab3}, {0xab5, 0xab9}, {0xabd, 0xabd},
+ {0xad0, 0xad0}, {0xae0, 0xae1}, {0xb05, 0xb0c}, {0xb0f, 0xb10},
+ {0xb13, 0xb28}, {0xb2a, 0xb30}, {0xb32, 0xb33}, {0xb35, 0xb39},
+ {0xb3d, 0xb3d}, {0xb5c, 0xb5d}, {0xb5f, 0xb61}, {0xb71, 0xb71},
+ {0xb83, 0xb83}, {0xb85, 0xb8a}, {0xb8e, 0xb90}, {0xb92, 0xb95},
+ {0xb99, 0xb9a}, {0xb9c, 0xb9c}, {0xb9e, 0xb9f}, {0xba3, 0xba4},
+ {0xba8, 0xbaa}, {0xbae, 0xbb5}, {0xbb7, 0xbb9}, {0xc05, 0xc0c},
+ {0xc0e, 0xc10}, {0xc12, 0xc28}, {0xc2a, 0xc33}, {0xc35, 0xc39},
+ {0xc60, 0xc61}, {0xc85, 0xc8c}, {0xc8e, 0xc90}, {0xc92, 0xca8},
+ {0xcaa, 0xcb3}, {0xcb5, 0xcb9}, {0xcbd, 0xcbd}, {0xcde, 0xcde},
+ {0xce0, 0xce1}, {0xd05, 0xd0c}, {0xd0e, 0xd10}, {0xd12, 0xd28},
+ {0xd2a, 0xd39}, {0xd60, 0xd61}, {0xd85, 0xd96}, {0xd9a, 0xdb1},
+ {0xdb3, 0xdbb}, {0xdbd, 0xdbd}, {0xdc0, 0xdc6}, {0xe01, 0xe30},
+ {0xe32, 0xe33}, {0xe40, 0xe46}, {0xe81, 0xe82}, {0xe84, 0xe84},
+ {0xe87, 0xe88}, {0xe8a, 0xe8a}, {0xe8d, 0xe8d}, {0xe94, 0xe97},
+ {0xe99, 0xe9f}, {0xea1, 0xea3}, {0xea5, 0xea5}, {0xea7, 0xea7},
+ {0xeaa, 0xeab}, {0xead, 0xeb0}, {0xeb2, 0xeb3}, {0xebd, 0xebd},
+ {0xec0, 0xec4}, {0xec6, 0xec6}, {0xedc, 0xedd}, {0xf00, 0xf00},
+ {0xf40, 0xf47}, {0xf49, 0xf6a}, {0xf88, 0xf8b}, {0x1000, 0x1021},
+ {0x1023, 0x1027}, {0x1029, 0x102a}, {0x1050, 0x1055}, {0x10a0, 0x10c5},
+ {0x10d0, 0x10f8}, {0x1100, 0x1159}, {0x115f, 0x11a2}, {0x11a8, 0x11f9},
+ {0x1200, 0x1206}, {0x1208, 0x1246}, {0x1248, 0x1248}, {0x124a, 0x124d},
+ {0x1250, 0x1256}, {0x1258, 0x1258}, {0x125a, 0x125d}, {0x1260, 0x1286},
+ {0x1288, 0x1288}, {0x128a, 0x128d}, {0x1290, 0x12ae}, {0x12b0, 0x12b0},
+ {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c0, 0x12c0}, {0x12c2, 0x12c5},
+ {0x12c8, 0x12ce}, {0x12d0, 0x12d6}, {0x12d8, 0x12ee}, {0x12f0, 0x130e},
+ {0x1310, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x131e}, {0x1320, 0x1346},
+ {0x1348, 0x135a}, {0x13a0, 0x13f4}, {0x1401, 0x166c}, {0x166f, 0x1676},
+ {0x1681, 0x169a}, {0x16a0, 0x16ea}, {0x1700, 0x170c}, {0x170e, 0x1711},
+ {0x1720, 0x1731}, {0x1740, 0x1751}, {0x1760, 0x176c}, {0x176e, 0x1770},
+ {0x1780, 0x17b3}, {0x17d7, 0x17d7}, {0x17dc, 0x17dc}, {0x1820, 0x1877},
+ {0x1880, 0x18a8}, {0x1900, 0x191c}, {0x1950, 0x196d}, {0x1970, 0x1974},
+ {0x1d00, 0x1d6b}, {0x1e00, 0x1e9b}, {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15},
+ {0x1f18, 0x1f1d}, {0x1f20, 0x1f45}, {0x1f48, 0x1f4d}, {0x1f50, 0x1f57},
+ {0x1f59, 0x1f59}, {0x1f5b, 0x1f5b}, {0x1f5d, 0x1f5d}, {0x1f5f, 0x1f7d},
+ {0x1f80, 0x1fb4}, {0x1fb6, 0x1fbc}, {0x1fbe, 0x1fbe}, {0x1fc2, 0x1fc4},
+ {0x1fc6, 0x1fcc}, {0x1fd0, 0x1fd3}, {0x1fd6, 0x1fdb}, {0x1fe0, 0x1fec},
+ {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffc}, {0x2071, 0x2071}, {0x207f, 0x207f},
+ {0x2102, 0x2102}, {0x2107, 0x2107}, {0x210a, 0x2113}, {0x2115, 0x2115},
+ {0x2119, 0x211d}, {0x2124, 0x2124}, {0x2126, 0x2126}, {0x2128, 0x2128},
+ {0x212a, 0x212d}, {0x212f, 0x2131}, {0x2133, 0x2139}, {0x213d, 0x213f},
+ {0x2145, 0x2149}, {0x3005, 0x3006}, {0x3031, 0x3035}, {0x303b, 0x303c},
+ {0x3041, 0x3096}, {0x309d, 0x309f}, {0x30a1, 0x30fa}, {0x30fc, 0x30ff},
+ {0x3105, 0x312c}, {0x3131, 0x318e}, {0x31a0, 0x31b7}, {0x31f0, 0x31ff},
+ {0x3400, 0x3400}, {0x4db5, 0x4db5}, {0x4e00, 0x4e00}, {0x9fa5, 0x9fa5},
+ {0xa000, 0xa48c}, {0xac00, 0xac00}, {0xd7a3, 0xd7a3}, {0xf900, 0xfa2d},
+ {0xfa30, 0xfa6a}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1d, 0xfb1d},
+ {0xfb1f, 0xfb28}, {0xfb2a, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb3e, 0xfb3e},
+ {0xfb40, 0xfb41}, {0xfb43, 0xfb44}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfd3d},
+ {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, {0xfe70, 0xfe74},
+ {0xfe76, 0xfefc}, {0xff21, 0xff3a}, {0xff41, 0xff5a}, {0xff66, 0xffbe},
+ {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, {0xffda, 0xffdc} };
+static const xmlChLRange xmlLL[] = {{0x10000, 0x1000b}, {0x1000d, 0x10026},
+ {0x10028, 0x1003a}, {0x1003c, 0x1003d}, {0x1003f, 0x1004d},
+ {0x10050, 0x1005d}, {0x10080, 0x100fa}, {0x10300, 0x1031e},
+ {0x10330, 0x10349}, {0x10380, 0x1039d}, {0x10400, 0x1049d},
+ {0x10800, 0x10805}, {0x10808, 0x10808}, {0x1080a, 0x10835},
+ {0x10837, 0x10838}, {0x1083c, 0x1083c}, {0x1083f, 0x1083f},
+ {0x1d400, 0x1d454}, {0x1d456, 0x1d49c}, {0x1d49e, 0x1d49f},
+ {0x1d4a2, 0x1d4a2}, {0x1d4a5, 0x1d4a6}, {0x1d4a9, 0x1d4ac},
+ {0x1d4ae, 0x1d4b9}, {0x1d4bb, 0x1d4bb}, {0x1d4bd, 0x1d4c3},
+ {0x1d4c5, 0x1d505}, {0x1d507, 0x1d50a}, {0x1d50d, 0x1d514},
+ {0x1d516, 0x1d51c}, {0x1d51e, 0x1d539}, {0x1d53b, 0x1d53e},
+ {0x1d540, 0x1d544}, {0x1d546, 0x1d546}, {0x1d54a, 0x1d550},
+ {0x1d552, 0x1d6a3}, {0x1d6a8, 0x1d6c0}, {0x1d6c2, 0x1d6da},
+ {0x1d6dc, 0x1d6fa}, {0x1d6fc, 0x1d714}, {0x1d716, 0x1d734},
+ {0x1d736, 0x1d74e}, {0x1d750, 0x1d76e}, {0x1d770, 0x1d788},
+ {0x1d78a, 0x1d7a8}, {0x1d7aa, 0x1d7c2}, {0x1d7c4, 0x1d7c9},
+ {0x20000, 0x20000}, {0x2a6d6, 0x2a6d6}, {0x2f800, 0x2fa1d} };
+static xmlChRangeGroup xmlLG = {279,50,xmlLS,xmlLL};
+
+static const xmlChSRange xmlLlS[] = {{0x61, 0x7a}, {0xaa, 0xaa},
+ {0xb5, 0xb5}, {0xba, 0xba}, {0xdf, 0xf6}, {0xf8, 0xff}, {0x101, 0x101},
+ {0x103, 0x103}, {0x105, 0x105}, {0x107, 0x107}, {0x109, 0x109},
+ {0x10b, 0x10b}, {0x10d, 0x10d}, {0x10f, 0x10f}, {0x111, 0x111},
+ {0x113, 0x113}, {0x115, 0x115}, {0x117, 0x117}, {0x119, 0x119},
+ {0x11b, 0x11b}, {0x11d, 0x11d}, {0x11f, 0x11f}, {0x121, 0x121},
+ {0x123, 0x123}, {0x125, 0x125}, {0x127, 0x127}, {0x129, 0x129},
+ {0x12b, 0x12b}, {0x12d, 0x12d}, {0x12f, 0x12f}, {0x131, 0x131},
+ {0x133, 0x133}, {0x135, 0x135}, {0x137, 0x138}, {0x13a, 0x13a},
+ {0x13c, 0x13c}, {0x13e, 0x13e}, {0x140, 0x140}, {0x142, 0x142},
+ {0x144, 0x144}, {0x146, 0x146}, {0x148, 0x149}, {0x14b, 0x14b},
+ {0x14d, 0x14d}, {0x14f, 0x14f}, {0x151, 0x151}, {0x153, 0x153},
+ {0x155, 0x155}, {0x157, 0x157}, {0x159, 0x159}, {0x15b, 0x15b},
+ {0x15d, 0x15d}, {0x15f, 0x15f}, {0x161, 0x161}, {0x163, 0x163},
+ {0x165, 0x165}, {0x167, 0x167}, {0x169, 0x169}, {0x16b, 0x16b},
+ {0x16d, 0x16d}, {0x16f, 0x16f}, {0x171, 0x171}, {0x173, 0x173},
+ {0x175, 0x175}, {0x177, 0x177}, {0x17a, 0x17a}, {0x17c, 0x17c},
+ {0x17e, 0x180}, {0x183, 0x183}, {0x185, 0x185}, {0x188, 0x188},
+ {0x18c, 0x18d}, {0x192, 0x192}, {0x195, 0x195}, {0x199, 0x19b},
+ {0x19e, 0x19e}, {0x1a1, 0x1a1}, {0x1a3, 0x1a3}, {0x1a5, 0x1a5},
+ {0x1a8, 0x1a8}, {0x1aa, 0x1ab}, {0x1ad, 0x1ad}, {0x1b0, 0x1b0},
+ {0x1b4, 0x1b4}, {0x1b6, 0x1b6}, {0x1b9, 0x1ba}, {0x1bd, 0x1bf},
+ {0x1c6, 0x1c6}, {0x1c9, 0x1c9}, {0x1cc, 0x1cc}, {0x1ce, 0x1ce},
+ {0x1d0, 0x1d0}, {0x1d2, 0x1d2}, {0x1d4, 0x1d4}, {0x1d6, 0x1d6},
+ {0x1d8, 0x1d8}, {0x1da, 0x1da}, {0x1dc, 0x1dd}, {0x1df, 0x1df},
+ {0x1e1, 0x1e1}, {0x1e3, 0x1e3}, {0x1e5, 0x1e5}, {0x1e7, 0x1e7},
+ {0x1e9, 0x1e9}, {0x1eb, 0x1eb}, {0x1ed, 0x1ed}, {0x1ef, 0x1f0},
+ {0x1f3, 0x1f3}, {0x1f5, 0x1f5}, {0x1f9, 0x1f9}, {0x1fb, 0x1fb},
+ {0x1fd, 0x1fd}, {0x1ff, 0x1ff}, {0x201, 0x201}, {0x203, 0x203},
+ {0x205, 0x205}, {0x207, 0x207}, {0x209, 0x209}, {0x20b, 0x20b},
+ {0x20d, 0x20d}, {0x20f, 0x20f}, {0x211, 0x211}, {0x213, 0x213},
+ {0x215, 0x215}, {0x217, 0x217}, {0x219, 0x219}, {0x21b, 0x21b},
+ {0x21d, 0x21d}, {0x21f, 0x21f}, {0x221, 0x221}, {0x223, 0x223},
+ {0x225, 0x225}, {0x227, 0x227}, {0x229, 0x229}, {0x22b, 0x22b},
+ {0x22d, 0x22d}, {0x22f, 0x22f}, {0x231, 0x231}, {0x233, 0x236},
+ {0x250, 0x2af}, {0x390, 0x390}, {0x3ac, 0x3ce}, {0x3d0, 0x3d1},
+ {0x3d5, 0x3d7}, {0x3d9, 0x3d9}, {0x3db, 0x3db}, {0x3dd, 0x3dd},
+ {0x3df, 0x3df}, {0x3e1, 0x3e1}, {0x3e3, 0x3e3}, {0x3e5, 0x3e5},
+ {0x3e7, 0x3e7}, {0x3e9, 0x3e9}, {0x3eb, 0x3eb}, {0x3ed, 0x3ed},
+ {0x3ef, 0x3f3}, {0x3f5, 0x3f5}, {0x3f8, 0x3f8}, {0x3fb, 0x3fb},
+ {0x430, 0x45f}, {0x461, 0x461}, {0x463, 0x463}, {0x465, 0x465},
+ {0x467, 0x467}, {0x469, 0x469}, {0x46b, 0x46b}, {0x46d, 0x46d},
+ {0x46f, 0x46f}, {0x471, 0x471}, {0x473, 0x473}, {0x475, 0x475},
+ {0x477, 0x477}, {0x479, 0x479}, {0x47b, 0x47b}, {0x47d, 0x47d},
+ {0x47f, 0x47f}, {0x481, 0x481}, {0x48b, 0x48b}, {0x48d, 0x48d},
+ {0x48f, 0x48f}, {0x491, 0x491}, {0x493, 0x493}, {0x495, 0x495},
+ {0x497, 0x497}, {0x499, 0x499}, {0x49b, 0x49b}, {0x49d, 0x49d},
+ {0x49f, 0x49f}, {0x4a1, 0x4a1}, {0x4a3, 0x4a3}, {0x4a5, 0x4a5},
+ {0x4a7, 0x4a7}, {0x4a9, 0x4a9}, {0x4ab, 0x4ab}, {0x4ad, 0x4ad},
+ {0x4af, 0x4af}, {0x4b1, 0x4b1}, {0x4b3, 0x4b3}, {0x4b5, 0x4b5},
+ {0x4b7, 0x4b7}, {0x4b9, 0x4b9}, {0x4bb, 0x4bb}, {0x4bd, 0x4bd},
+ {0x4bf, 0x4bf}, {0x4c2, 0x4c2}, {0x4c4, 0x4c4}, {0x4c6, 0x4c6},
+ {0x4c8, 0x4c8}, {0x4ca, 0x4ca}, {0x4cc, 0x4cc}, {0x4ce, 0x4ce},
+ {0x4d1, 0x4d1}, {0x4d3, 0x4d3}, {0x4d5, 0x4d5}, {0x4d7, 0x4d7},
+ {0x4d9, 0x4d9}, {0x4db, 0x4db}, {0x4dd, 0x4dd}, {0x4df, 0x4df},
+ {0x4e1, 0x4e1}, {0x4e3, 0x4e3}, {0x4e5, 0x4e5}, {0x4e7, 0x4e7},
+ {0x4e9, 0x4e9}, {0x4eb, 0x4eb}, {0x4ed, 0x4ed}, {0x4ef, 0x4ef},
+ {0x4f1, 0x4f1}, {0x4f3, 0x4f3}, {0x4f5, 0x4f5}, {0x4f9, 0x4f9},
+ {0x501, 0x501}, {0x503, 0x503}, {0x505, 0x505}, {0x507, 0x507},
+ {0x509, 0x509}, {0x50b, 0x50b}, {0x50d, 0x50d}, {0x50f, 0x50f},
+ {0x561, 0x587}, {0x1d00, 0x1d2b}, {0x1d62, 0x1d6b}, {0x1e01, 0x1e01},
+ {0x1e03, 0x1e03}, {0x1e05, 0x1e05}, {0x1e07, 0x1e07}, {0x1e09, 0x1e09},
+ {0x1e0b, 0x1e0b}, {0x1e0d, 0x1e0d}, {0x1e0f, 0x1e0f}, {0x1e11, 0x1e11},
+ {0x1e13, 0x1e13}, {0x1e15, 0x1e15}, {0x1e17, 0x1e17}, {0x1e19, 0x1e19},
+ {0x1e1b, 0x1e1b}, {0x1e1d, 0x1e1d}, {0x1e1f, 0x1e1f}, {0x1e21, 0x1e21},
+ {0x1e23, 0x1e23}, {0x1e25, 0x1e25}, {0x1e27, 0x1e27}, {0x1e29, 0x1e29},
+ {0x1e2b, 0x1e2b}, {0x1e2d, 0x1e2d}, {0x1e2f, 0x1e2f}, {0x1e31, 0x1e31},
+ {0x1e33, 0x1e33}, {0x1e35, 0x1e35}, {0x1e37, 0x1e37}, {0x1e39, 0x1e39},
+ {0x1e3b, 0x1e3b}, {0x1e3d, 0x1e3d}, {0x1e3f, 0x1e3f}, {0x1e41, 0x1e41},
+ {0x1e43, 0x1e43}, {0x1e45, 0x1e45}, {0x1e47, 0x1e47}, {0x1e49, 0x1e49},
+ {0x1e4b, 0x1e4b}, {0x1e4d, 0x1e4d}, {0x1e4f, 0x1e4f}, {0x1e51, 0x1e51},
+ {0x1e53, 0x1e53}, {0x1e55, 0x1e55}, {0x1e57, 0x1e57}, {0x1e59, 0x1e59},
+ {0x1e5b, 0x1e5b}, {0x1e5d, 0x1e5d}, {0x1e5f, 0x1e5f}, {0x1e61, 0x1e61},
+ {0x1e63, 0x1e63}, {0x1e65, 0x1e65}, {0x1e67, 0x1e67}, {0x1e69, 0x1e69},
+ {0x1e6b, 0x1e6b}, {0x1e6d, 0x1e6d}, {0x1e6f, 0x1e6f}, {0x1e71, 0x1e71},
+ {0x1e73, 0x1e73}, {0x1e75, 0x1e75}, {0x1e77, 0x1e77}, {0x1e79, 0x1e79},
+ {0x1e7b, 0x1e7b}, {0x1e7d, 0x1e7d}, {0x1e7f, 0x1e7f}, {0x1e81, 0x1e81},
+ {0x1e83, 0x1e83}, {0x1e85, 0x1e85}, {0x1e87, 0x1e87}, {0x1e89, 0x1e89},
+ {0x1e8b, 0x1e8b}, {0x1e8d, 0x1e8d}, {0x1e8f, 0x1e8f}, {0x1e91, 0x1e91},
+ {0x1e93, 0x1e93}, {0x1e95, 0x1e9b}, {0x1ea1, 0x1ea1}, {0x1ea3, 0x1ea3},
+ {0x1ea5, 0x1ea5}, {0x1ea7, 0x1ea7}, {0x1ea9, 0x1ea9}, {0x1eab, 0x1eab},
+ {0x1ead, 0x1ead}, {0x1eaf, 0x1eaf}, {0x1eb1, 0x1eb1}, {0x1eb3, 0x1eb3},
+ {0x1eb5, 0x1eb5}, {0x1eb7, 0x1eb7}, {0x1eb9, 0x1eb9}, {0x1ebb, 0x1ebb},
+ {0x1ebd, 0x1ebd}, {0x1ebf, 0x1ebf}, {0x1ec1, 0x1ec1}, {0x1ec3, 0x1ec3},
+ {0x1ec5, 0x1ec5}, {0x1ec7, 0x1ec7}, {0x1ec9, 0x1ec9}, {0x1ecb, 0x1ecb},
+ {0x1ecd, 0x1ecd}, {0x1ecf, 0x1ecf}, {0x1ed1, 0x1ed1}, {0x1ed3, 0x1ed3},
+ {0x1ed5, 0x1ed5}, {0x1ed7, 0x1ed7}, {0x1ed9, 0x1ed9}, {0x1edb, 0x1edb},
+ {0x1edd, 0x1edd}, {0x1edf, 0x1edf}, {0x1ee1, 0x1ee1}, {0x1ee3, 0x1ee3},
+ {0x1ee5, 0x1ee5}, {0x1ee7, 0x1ee7}, {0x1ee9, 0x1ee9}, {0x1eeb, 0x1eeb},
+ {0x1eed, 0x1eed}, {0x1eef, 0x1eef}, {0x1ef1, 0x1ef1}, {0x1ef3, 0x1ef3},
+ {0x1ef5, 0x1ef5}, {0x1ef7, 0x1ef7}, {0x1ef9, 0x1ef9}, {0x1f00, 0x1f07},
+ {0x1f10, 0x1f15}, {0x1f20, 0x1f27}, {0x1f30, 0x1f37}, {0x1f40, 0x1f45},
+ {0x1f50, 0x1f57}, {0x1f60, 0x1f67}, {0x1f70, 0x1f7d}, {0x1f80, 0x1f87},
+ {0x1f90, 0x1f97}, {0x1fa0, 0x1fa7}, {0x1fb0, 0x1fb4}, {0x1fb6, 0x1fb7},
+ {0x1fbe, 0x1fbe}, {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fc7}, {0x1fd0, 0x1fd3},
+ {0x1fd6, 0x1fd7}, {0x1fe0, 0x1fe7}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ff7},
+ {0x2071, 0x2071}, {0x207f, 0x207f}, {0x210a, 0x210a}, {0x210e, 0x210f},
+ {0x2113, 0x2113}, {0x212f, 0x212f}, {0x2134, 0x2134}, {0x2139, 0x2139},
+ {0x213d, 0x213d}, {0x2146, 0x2149}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17},
+ {0xff41, 0xff5a} };
+static const xmlChLRange xmlLlL[] = {{0x10428, 0x1044f}, {0x1d41a, 0x1d433},
+ {0x1d44e, 0x1d454}, {0x1d456, 0x1d467}, {0x1d482, 0x1d49b},
+ {0x1d4b6, 0x1d4b9}, {0x1d4bb, 0x1d4bb}, {0x1d4bd, 0x1d4c3},
+ {0x1d4c5, 0x1d4cf}, {0x1d4ea, 0x1d503}, {0x1d51e, 0x1d537},
+ {0x1d552, 0x1d56b}, {0x1d586, 0x1d59f}, {0x1d5ba, 0x1d5d3},
+ {0x1d5ee, 0x1d607}, {0x1d622, 0x1d63b}, {0x1d656, 0x1d66f},
+ {0x1d68a, 0x1d6a3}, {0x1d6c2, 0x1d6da}, {0x1d6dc, 0x1d6e1},
+ {0x1d6fc, 0x1d714}, {0x1d716, 0x1d71b}, {0x1d736, 0x1d74e},
+ {0x1d750, 0x1d755}, {0x1d770, 0x1d788}, {0x1d78a, 0x1d78f},
+ {0x1d7aa, 0x1d7c2}, {0x1d7c4, 0x1d7c9} };
+static xmlChRangeGroup xmlLlG = {396,28,xmlLlS,xmlLlL};
+
+static const xmlChSRange xmlLmS[] = {{0x2b0, 0x2c1}, {0x2c6, 0x2d1},
+ {0x2e0, 0x2e4}, {0x2ee, 0x2ee}, {0x37a, 0x37a}, {0x559, 0x559},
+ {0x640, 0x640}, {0x6e5, 0x6e6}, {0xe46, 0xe46}, {0xec6, 0xec6},
+ {0x17d7, 0x17d7}, {0x1843, 0x1843}, {0x1d2c, 0x1d61}, {0x3005, 0x3005},
+ {0x3031, 0x3035}, {0x303b, 0x303b}, {0x309d, 0x309e}, {0x30fc, 0x30fe},
+ {0xff70, 0xff70}, {0xff9e, 0xff9f} };
+static xmlChRangeGroup xmlLmG = {20,0,xmlLmS,NULL};
+
+static const xmlChSRange xmlLoS[] = {{0x1bb, 0x1bb}, {0x1c0, 0x1c3},
+ {0x5d0, 0x5ea}, {0x5f0, 0x5f2}, {0x621, 0x63a}, {0x641, 0x64a},
+ {0x66e, 0x66f}, {0x671, 0x6d3}, {0x6d5, 0x6d5}, {0x6ee, 0x6ef},
+ {0x6fa, 0x6fc}, {0x6ff, 0x6ff}, {0x710, 0x710}, {0x712, 0x72f},
+ {0x74d, 0x74f}, {0x780, 0x7a5}, {0x7b1, 0x7b1}, {0x904, 0x939},
+ {0x93d, 0x93d}, {0x950, 0x950}, {0x958, 0x961}, {0x985, 0x98c},
+ {0x98f, 0x990}, {0x993, 0x9a8}, {0x9aa, 0x9b0}, {0x9b2, 0x9b2},
+ {0x9b6, 0x9b9}, {0x9bd, 0x9bd}, {0x9dc, 0x9dd}, {0x9df, 0x9e1},
+ {0x9f0, 0x9f1}, {0xa05, 0xa0a}, {0xa0f, 0xa10}, {0xa13, 0xa28},
+ {0xa2a, 0xa30}, {0xa32, 0xa33}, {0xa35, 0xa36}, {0xa38, 0xa39},
+ {0xa59, 0xa5c}, {0xa5e, 0xa5e}, {0xa72, 0xa74}, {0xa85, 0xa8d},
+ {0xa8f, 0xa91}, {0xa93, 0xaa8}, {0xaaa, 0xab0}, {0xab2, 0xab3},
+ {0xab5, 0xab9}, {0xabd, 0xabd}, {0xad0, 0xad0}, {0xae0, 0xae1},
+ {0xb05, 0xb0c}, {0xb0f, 0xb10}, {0xb13, 0xb28}, {0xb2a, 0xb30},
+ {0xb32, 0xb33}, {0xb35, 0xb39}, {0xb3d, 0xb3d}, {0xb5c, 0xb5d},
+ {0xb5f, 0xb61}, {0xb71, 0xb71}, {0xb83, 0xb83}, {0xb85, 0xb8a},
+ {0xb8e, 0xb90}, {0xb92, 0xb95}, {0xb99, 0xb9a}, {0xb9c, 0xb9c},
+ {0xb9e, 0xb9f}, {0xba3, 0xba4}, {0xba8, 0xbaa}, {0xbae, 0xbb5},
+ {0xbb7, 0xbb9}, {0xc05, 0xc0c}, {0xc0e, 0xc10}, {0xc12, 0xc28},
+ {0xc2a, 0xc33}, {0xc35, 0xc39}, {0xc60, 0xc61}, {0xc85, 0xc8c},
+ {0xc8e, 0xc90}, {0xc92, 0xca8}, {0xcaa, 0xcb3}, {0xcb5, 0xcb9},
+ {0xcbd, 0xcbd}, {0xcde, 0xcde}, {0xce0, 0xce1}, {0xd05, 0xd0c},
+ {0xd0e, 0xd10}, {0xd12, 0xd28}, {0xd2a, 0xd39}, {0xd60, 0xd61},
+ {0xd85, 0xd96}, {0xd9a, 0xdb1}, {0xdb3, 0xdbb}, {0xdbd, 0xdbd},
+ {0xdc0, 0xdc6}, {0xe01, 0xe30}, {0xe32, 0xe33}, {0xe40, 0xe45},
+ {0xe81, 0xe82}, {0xe84, 0xe84}, {0xe87, 0xe88}, {0xe8a, 0xe8a},
+ {0xe8d, 0xe8d}, {0xe94, 0xe97}, {0xe99, 0xe9f}, {0xea1, 0xea3},
+ {0xea5, 0xea5}, {0xea7, 0xea7}, {0xeaa, 0xeab}, {0xead, 0xeb0},
+ {0xeb2, 0xeb3}, {0xebd, 0xebd}, {0xec0, 0xec4}, {0xedc, 0xedd},
+ {0xf00, 0xf00}, {0xf40, 0xf47}, {0xf49, 0xf6a}, {0xf88, 0xf8b},
+ {0x1000, 0x1021}, {0x1023, 0x1027}, {0x1029, 0x102a}, {0x1050, 0x1055},
+ {0x10d0, 0x10f8}, {0x1100, 0x1159}, {0x115f, 0x11a2}, {0x11a8, 0x11f9},
+ {0x1200, 0x1206}, {0x1208, 0x1246}, {0x1248, 0x1248}, {0x124a, 0x124d},
+ {0x1250, 0x1256}, {0x1258, 0x1258}, {0x125a, 0x125d}, {0x1260, 0x1286},
+ {0x1288, 0x1288}, {0x128a, 0x128d}, {0x1290, 0x12ae}, {0x12b0, 0x12b0},
+ {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c0, 0x12c0}, {0x12c2, 0x12c5},
+ {0x12c8, 0x12ce}, {0x12d0, 0x12d6}, {0x12d8, 0x12ee}, {0x12f0, 0x130e},
+ {0x1310, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x131e}, {0x1320, 0x1346},
+ {0x1348, 0x135a}, {0x13a0, 0x13f4}, {0x1401, 0x166c}, {0x166f, 0x1676},
+ {0x1681, 0x169a}, {0x16a0, 0x16ea}, {0x1700, 0x170c}, {0x170e, 0x1711},
+ {0x1720, 0x1731}, {0x1740, 0x1751}, {0x1760, 0x176c}, {0x176e, 0x1770},
+ {0x1780, 0x17b3}, {0x17dc, 0x17dc}, {0x1820, 0x1842}, {0x1844, 0x1877},
+ {0x1880, 0x18a8}, {0x1900, 0x191c}, {0x1950, 0x196d}, {0x1970, 0x1974},
+ {0x2135, 0x2138}, {0x3006, 0x3006}, {0x303c, 0x303c}, {0x3041, 0x3096},
+ {0x309f, 0x309f}, {0x30a1, 0x30fa}, {0x30ff, 0x30ff}, {0x3105, 0x312c},
+ {0x3131, 0x318e}, {0x31a0, 0x31b7}, {0x31f0, 0x31ff}, {0x3400, 0x3400},
+ {0x4db5, 0x4db5}, {0x4e00, 0x4e00}, {0x9fa5, 0x9fa5}, {0xa000, 0xa48c},
+ {0xac00, 0xac00}, {0xd7a3, 0xd7a3}, {0xf900, 0xfa2d}, {0xfa30, 0xfa6a},
+ {0xfb1d, 0xfb1d}, {0xfb1f, 0xfb28}, {0xfb2a, 0xfb36}, {0xfb38, 0xfb3c},
+ {0xfb3e, 0xfb3e}, {0xfb40, 0xfb41}, {0xfb43, 0xfb44}, {0xfb46, 0xfbb1},
+ {0xfbd3, 0xfd3d}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb},
+ {0xfe70, 0xfe74}, {0xfe76, 0xfefc}, {0xff66, 0xff6f}, {0xff71, 0xff9d},
+ {0xffa0, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7},
+ {0xffda, 0xffdc} };
+static const xmlChLRange xmlLoL[] = {{0x10000, 0x1000b}, {0x1000d, 0x10026},
+ {0x10028, 0x1003a}, {0x1003c, 0x1003d}, {0x1003f, 0x1004d},
+ {0x10050, 0x1005d}, {0x10080, 0x100fa}, {0x10300, 0x1031e},
+ {0x10330, 0x10349}, {0x10380, 0x1039d}, {0x10450, 0x1049d},
+ {0x10800, 0x10805}, {0x10808, 0x10808}, {0x1080a, 0x10835},
+ {0x10837, 0x10838}, {0x1083c, 0x1083c}, {0x1083f, 0x1083f},
+ {0x20000, 0x20000}, {0x2a6d6, 0x2a6d6}, {0x2f800, 0x2fa1d} };
+static xmlChRangeGroup xmlLoG = {211,20,xmlLoS,xmlLoL};
+
+static const xmlChSRange xmlLtS[] = {{0x1c5, 0x1c5}, {0x1c8, 0x1c8},
+ {0x1cb, 0x1cb}, {0x1f2, 0x1f2}, {0x1f88, 0x1f8f}, {0x1f98, 0x1f9f},
+ {0x1fa8, 0x1faf}, {0x1fbc, 0x1fbc}, {0x1fcc, 0x1fcc}, {0x1ffc, 0x1ffc} };
+static xmlChRangeGroup xmlLtG = {10,0,xmlLtS,NULL};
+
+static const xmlChSRange xmlLuS[] = {{0x41, 0x5a}, {0xc0, 0xd6},
+ {0xd8, 0xde}, {0x100, 0x100}, {0x102, 0x102}, {0x104, 0x104},
+ {0x106, 0x106}, {0x108, 0x108}, {0x10a, 0x10a}, {0x10c, 0x10c},
+ {0x10e, 0x10e}, {0x110, 0x110}, {0x112, 0x112}, {0x114, 0x114},
+ {0x116, 0x116}, {0x118, 0x118}, {0x11a, 0x11a}, {0x11c, 0x11c},
+ {0x11e, 0x11e}, {0x120, 0x120}, {0x122, 0x122}, {0x124, 0x124},
+ {0x126, 0x126}, {0x128, 0x128}, {0x12a, 0x12a}, {0x12c, 0x12c},
+ {0x12e, 0x12e}, {0x130, 0x130}, {0x132, 0x132}, {0x134, 0x134},
+ {0x136, 0x136}, {0x139, 0x139}, {0x13b, 0x13b}, {0x13d, 0x13d},
+ {0x13f, 0x13f}, {0x141, 0x141}, {0x143, 0x143}, {0x145, 0x145},
+ {0x147, 0x147}, {0x14a, 0x14a}, {0x14c, 0x14c}, {0x14e, 0x14e},
+ {0x150, 0x150}, {0x152, 0x152}, {0x154, 0x154}, {0x156, 0x156},
+ {0x158, 0x158}, {0x15a, 0x15a}, {0x15c, 0x15c}, {0x15e, 0x15e},
+ {0x160, 0x160}, {0x162, 0x162}, {0x164, 0x164}, {0x166, 0x166},
+ {0x168, 0x168}, {0x16a, 0x16a}, {0x16c, 0x16c}, {0x16e, 0x16e},
+ {0x170, 0x170}, {0x172, 0x172}, {0x174, 0x174}, {0x176, 0x176},
+ {0x178, 0x179}, {0x17b, 0x17b}, {0x17d, 0x17d}, {0x181, 0x182},
+ {0x184, 0x184}, {0x186, 0x187}, {0x189, 0x18b}, {0x18e, 0x191},
+ {0x193, 0x194}, {0x196, 0x198}, {0x19c, 0x19d}, {0x19f, 0x1a0},
+ {0x1a2, 0x1a2}, {0x1a4, 0x1a4}, {0x1a6, 0x1a7}, {0x1a9, 0x1a9},
+ {0x1ac, 0x1ac}, {0x1ae, 0x1af}, {0x1b1, 0x1b3}, {0x1b5, 0x1b5},
+ {0x1b7, 0x1b8}, {0x1bc, 0x1bc}, {0x1c4, 0x1c4}, {0x1c7, 0x1c7},
+ {0x1ca, 0x1ca}, {0x1cd, 0x1cd}, {0x1cf, 0x1cf}, {0x1d1, 0x1d1},
+ {0x1d3, 0x1d3}, {0x1d5, 0x1d5}, {0x1d7, 0x1d7}, {0x1d9, 0x1d9},
+ {0x1db, 0x1db}, {0x1de, 0x1de}, {0x1e0, 0x1e0}, {0x1e2, 0x1e2},
+ {0x1e4, 0x1e4}, {0x1e6, 0x1e6}, {0x1e8, 0x1e8}, {0x1ea, 0x1ea},
+ {0x1ec, 0x1ec}, {0x1ee, 0x1ee}, {0x1f1, 0x1f1}, {0x1f4, 0x1f4},
+ {0x1f6, 0x1f8}, {0x1fa, 0x1fa}, {0x1fc, 0x1fc}, {0x1fe, 0x1fe},
+ {0x200, 0x200}, {0x202, 0x202}, {0x204, 0x204}, {0x206, 0x206},
+ {0x208, 0x208}, {0x20a, 0x20a}, {0x20c, 0x20c}, {0x20e, 0x20e},
+ {0x210, 0x210}, {0x212, 0x212}, {0x214, 0x214}, {0x216, 0x216},
+ {0x218, 0x218}, {0x21a, 0x21a}, {0x21c, 0x21c}, {0x21e, 0x21e},
+ {0x220, 0x220}, {0x222, 0x222}, {0x224, 0x224}, {0x226, 0x226},
+ {0x228, 0x228}, {0x22a, 0x22a}, {0x22c, 0x22c}, {0x22e, 0x22e},
+ {0x230, 0x230}, {0x232, 0x232}, {0x386, 0x386}, {0x388, 0x38a},
+ {0x38c, 0x38c}, {0x38e, 0x38f}, {0x391, 0x3a1}, {0x3a3, 0x3ab},
+ {0x3d2, 0x3d4}, {0x3d8, 0x3d8}, {0x3da, 0x3da}, {0x3dc, 0x3dc},
+ {0x3de, 0x3de}, {0x3e0, 0x3e0}, {0x3e2, 0x3e2}, {0x3e4, 0x3e4},
+ {0x3e6, 0x3e6}, {0x3e8, 0x3e8}, {0x3ea, 0x3ea}, {0x3ec, 0x3ec},
+ {0x3ee, 0x3ee}, {0x3f4, 0x3f4}, {0x3f7, 0x3f7}, {0x3f9, 0x3fa},
+ {0x400, 0x42f}, {0x460, 0x460}, {0x462, 0x462}, {0x464, 0x464},
+ {0x466, 0x466}, {0x468, 0x468}, {0x46a, 0x46a}, {0x46c, 0x46c},
+ {0x46e, 0x46e}, {0x470, 0x470}, {0x472, 0x472}, {0x474, 0x474},
+ {0x476, 0x476}, {0x478, 0x478}, {0x47a, 0x47a}, {0x47c, 0x47c},
+ {0x47e, 0x47e}, {0x480, 0x480}, {0x48a, 0x48a}, {0x48c, 0x48c},
+ {0x48e, 0x48e}, {0x490, 0x490}, {0x492, 0x492}, {0x494, 0x494},
+ {0x496, 0x496}, {0x498, 0x498}, {0x49a, 0x49a}, {0x49c, 0x49c},
+ {0x49e, 0x49e}, {0x4a0, 0x4a0}, {0x4a2, 0x4a2}, {0x4a4, 0x4a4},
+ {0x4a6, 0x4a6}, {0x4a8, 0x4a8}, {0x4aa, 0x4aa}, {0x4ac, 0x4ac},
+ {0x4ae, 0x4ae}, {0x4b0, 0x4b0}, {0x4b2, 0x4b2}, {0x4b4, 0x4b4},
+ {0x4b6, 0x4b6}, {0x4b8, 0x4b8}, {0x4ba, 0x4ba}, {0x4bc, 0x4bc},
+ {0x4be, 0x4be}, {0x4c0, 0x4c1}, {0x4c3, 0x4c3}, {0x4c5, 0x4c5},
+ {0x4c7, 0x4c7}, {0x4c9, 0x4c9}, {0x4cb, 0x4cb}, {0x4cd, 0x4cd},
+ {0x4d0, 0x4d0}, {0x4d2, 0x4d2}, {0x4d4, 0x4d4}, {0x4d6, 0x4d6},
+ {0x4d8, 0x4d8}, {0x4da, 0x4da}, {0x4dc, 0x4dc}, {0x4de, 0x4de},
+ {0x4e0, 0x4e0}, {0x4e2, 0x4e2}, {0x4e4, 0x4e4}, {0x4e6, 0x4e6},
+ {0x4e8, 0x4e8}, {0x4ea, 0x4ea}, {0x4ec, 0x4ec}, {0x4ee, 0x4ee},
+ {0x4f0, 0x4f0}, {0x4f2, 0x4f2}, {0x4f4, 0x4f4}, {0x4f8, 0x4f8},
+ {0x500, 0x500}, {0x502, 0x502}, {0x504, 0x504}, {0x506, 0x506},
+ {0x508, 0x508}, {0x50a, 0x50a}, {0x50c, 0x50c}, {0x50e, 0x50e},
+ {0x531, 0x556}, {0x10a0, 0x10c5}, {0x1e00, 0x1e00}, {0x1e02, 0x1e02},
+ {0x1e04, 0x1e04}, {0x1e06, 0x1e06}, {0x1e08, 0x1e08}, {0x1e0a, 0x1e0a},
+ {0x1e0c, 0x1e0c}, {0x1e0e, 0x1e0e}, {0x1e10, 0x1e10}, {0x1e12, 0x1e12},
+ {0x1e14, 0x1e14}, {0x1e16, 0x1e16}, {0x1e18, 0x1e18}, {0x1e1a, 0x1e1a},
+ {0x1e1c, 0x1e1c}, {0x1e1e, 0x1e1e}, {0x1e20, 0x1e20}, {0x1e22, 0x1e22},
+ {0x1e24, 0x1e24}, {0x1e26, 0x1e26}, {0x1e28, 0x1e28}, {0x1e2a, 0x1e2a},
+ {0x1e2c, 0x1e2c}, {0x1e2e, 0x1e2e}, {0x1e30, 0x1e30}, {0x1e32, 0x1e32},
+ {0x1e34, 0x1e34}, {0x1e36, 0x1e36}, {0x1e38, 0x1e38}, {0x1e3a, 0x1e3a},
+ {0x1e3c, 0x1e3c}, {0x1e3e, 0x1e3e}, {0x1e40, 0x1e40}, {0x1e42, 0x1e42},
+ {0x1e44, 0x1e44}, {0x1e46, 0x1e46}, {0x1e48, 0x1e48}, {0x1e4a, 0x1e4a},
+ {0x1e4c, 0x1e4c}, {0x1e4e, 0x1e4e}, {0x1e50, 0x1e50}, {0x1e52, 0x1e52},
+ {0x1e54, 0x1e54}, {0x1e56, 0x1e56}, {0x1e58, 0x1e58}, {0x1e5a, 0x1e5a},
+ {0x1e5c, 0x1e5c}, {0x1e5e, 0x1e5e}, {0x1e60, 0x1e60}, {0x1e62, 0x1e62},
+ {0x1e64, 0x1e64}, {0x1e66, 0x1e66}, {0x1e68, 0x1e68}, {0x1e6a, 0x1e6a},
+ {0x1e6c, 0x1e6c}, {0x1e6e, 0x1e6e}, {0x1e70, 0x1e70}, {0x1e72, 0x1e72},
+ {0x1e74, 0x1e74}, {0x1e76, 0x1e76}, {0x1e78, 0x1e78}, {0x1e7a, 0x1e7a},
+ {0x1e7c, 0x1e7c}, {0x1e7e, 0x1e7e}, {0x1e80, 0x1e80}, {0x1e82, 0x1e82},
+ {0x1e84, 0x1e84}, {0x1e86, 0x1e86}, {0x1e88, 0x1e88}, {0x1e8a, 0x1e8a},
+ {0x1e8c, 0x1e8c}, {0x1e8e, 0x1e8e}, {0x1e90, 0x1e90}, {0x1e92, 0x1e92},
+ {0x1e94, 0x1e94}, {0x1ea0, 0x1ea0}, {0x1ea2, 0x1ea2}, {0x1ea4, 0x1ea4},
+ {0x1ea6, 0x1ea6}, {0x1ea8, 0x1ea8}, {0x1eaa, 0x1eaa}, {0x1eac, 0x1eac},
+ {0x1eae, 0x1eae}, {0x1eb0, 0x1eb0}, {0x1eb2, 0x1eb2}, {0x1eb4, 0x1eb4},
+ {0x1eb6, 0x1eb6}, {0x1eb8, 0x1eb8}, {0x1eba, 0x1eba}, {0x1ebc, 0x1ebc},
+ {0x1ebe, 0x1ebe}, {0x1ec0, 0x1ec0}, {0x1ec2, 0x1ec2}, {0x1ec4, 0x1ec4},
+ {0x1ec6, 0x1ec6}, {0x1ec8, 0x1ec8}, {0x1eca, 0x1eca}, {0x1ecc, 0x1ecc},
+ {0x1ece, 0x1ece}, {0x1ed0, 0x1ed0}, {0x1ed2, 0x1ed2}, {0x1ed4, 0x1ed4},
+ {0x1ed6, 0x1ed6}, {0x1ed8, 0x1ed8}, {0x1eda, 0x1eda}, {0x1edc, 0x1edc},
+ {0x1ede, 0x1ede}, {0x1ee0, 0x1ee0}, {0x1ee2, 0x1ee2}, {0x1ee4, 0x1ee4},
+ {0x1ee6, 0x1ee6}, {0x1ee8, 0x1ee8}, {0x1eea, 0x1eea}, {0x1eec, 0x1eec},
+ {0x1eee, 0x1eee}, {0x1ef0, 0x1ef0}, {0x1ef2, 0x1ef2}, {0x1ef4, 0x1ef4},
+ {0x1ef6, 0x1ef6}, {0x1ef8, 0x1ef8}, {0x1f08, 0x1f0f}, {0x1f18, 0x1f1d},
+ {0x1f28, 0x1f2f}, {0x1f38, 0x1f3f}, {0x1f48, 0x1f4d}, {0x1f59, 0x1f59},
+ {0x1f5b, 0x1f5b}, {0x1f5d, 0x1f5d}, {0x1f5f, 0x1f5f}, {0x1f68, 0x1f6f},
+ {0x1fb8, 0x1fbb}, {0x1fc8, 0x1fcb}, {0x1fd8, 0x1fdb}, {0x1fe8, 0x1fec},
+ {0x1ff8, 0x1ffb}, {0x2102, 0x2102}, {0x2107, 0x2107}, {0x210b, 0x210d},
+ {0x2110, 0x2112}, {0x2115, 0x2115}, {0x2119, 0x211d}, {0x2124, 0x2124},
+ {0x2126, 0x2126}, {0x2128, 0x2128}, {0x212a, 0x212d}, {0x2130, 0x2131},
+ {0x2133, 0x2133}, {0x213e, 0x213f}, {0x2145, 0x2145}, {0xff21, 0xff3a} };
+static const xmlChLRange xmlLuL[] = {{0x10400, 0x10427}, {0x1d400, 0x1d419},
+ {0x1d434, 0x1d44d}, {0x1d468, 0x1d481}, {0x1d49c, 0x1d49c},
+ {0x1d49e, 0x1d49f}, {0x1d4a2, 0x1d4a2}, {0x1d4a5, 0x1d4a6},
+ {0x1d4a9, 0x1d4ac}, {0x1d4ae, 0x1d4b5}, {0x1d4d0, 0x1d4e9},
+ {0x1d504, 0x1d505}, {0x1d507, 0x1d50a}, {0x1d50d, 0x1d514},
+ {0x1d516, 0x1d51c}, {0x1d538, 0x1d539}, {0x1d53b, 0x1d53e},
+ {0x1d540, 0x1d544}, {0x1d546, 0x1d546}, {0x1d54a, 0x1d550},
+ {0x1d56c, 0x1d585}, {0x1d5a0, 0x1d5b9}, {0x1d5d4, 0x1d5ed},
+ {0x1d608, 0x1d621}, {0x1d63c, 0x1d655}, {0x1d670, 0x1d689},
+ {0x1d6a8, 0x1d6c0}, {0x1d6e2, 0x1d6fa}, {0x1d71c, 0x1d734},
+ {0x1d756, 0x1d76e}, {0x1d790, 0x1d7a8} };
+static xmlChRangeGroup xmlLuG = {390,31,xmlLuS,xmlLuL};
+
+static const xmlChSRange xmlMS[] = {{0x300, 0x357}, {0x35d, 0x36f},
+ {0x483, 0x486}, {0x488, 0x489}, {0x591, 0x5a1}, {0x5a3, 0x5b9},
+ {0x5bb, 0x5bd}, {0x5bf, 0x5bf}, {0x5c1, 0x5c2}, {0x5c4, 0x5c4},
+ {0x610, 0x615}, {0x64b, 0x658}, {0x670, 0x670}, {0x6d6, 0x6dc},
+ {0x6de, 0x6e4}, {0x6e7, 0x6e8}, {0x6ea, 0x6ed}, {0x711, 0x711},
+ {0x730, 0x74a}, {0x7a6, 0x7b0}, {0x901, 0x903}, {0x93c, 0x93c},
+ {0x93e, 0x94d}, {0x951, 0x954}, {0x962, 0x963}, {0x981, 0x983},
+ {0x9bc, 0x9bc}, {0x9be, 0x9c4}, {0x9c7, 0x9c8}, {0x9cb, 0x9cd},
+ {0x9d7, 0x9d7}, {0x9e2, 0x9e3}, {0xa01, 0xa03}, {0xa3c, 0xa3c},
+ {0xa3e, 0xa42}, {0xa47, 0xa48}, {0xa4b, 0xa4d}, {0xa70, 0xa71},
+ {0xa81, 0xa83}, {0xabc, 0xabc}, {0xabe, 0xac5}, {0xac7, 0xac9},
+ {0xacb, 0xacd}, {0xae2, 0xae3}, {0xb01, 0xb03}, {0xb3c, 0xb3c},
+ {0xb3e, 0xb43}, {0xb47, 0xb48}, {0xb4b, 0xb4d}, {0xb56, 0xb57},
+ {0xb82, 0xb82}, {0xbbe, 0xbc2}, {0xbc6, 0xbc8}, {0xbca, 0xbcd},
+ {0xbd7, 0xbd7}, {0xc01, 0xc03}, {0xc3e, 0xc44}, {0xc46, 0xc48},
+ {0xc4a, 0xc4d}, {0xc55, 0xc56}, {0xc82, 0xc83}, {0xcbc, 0xcbc},
+ {0xcbe, 0xcc4}, {0xcc6, 0xcc8}, {0xcca, 0xccd}, {0xcd5, 0xcd6},
+ {0xd02, 0xd03}, {0xd3e, 0xd43}, {0xd46, 0xd48}, {0xd4a, 0xd4d},
+ {0xd57, 0xd57}, {0xd82, 0xd83}, {0xdca, 0xdca}, {0xdcf, 0xdd4},
+ {0xdd6, 0xdd6}, {0xdd8, 0xddf}, {0xdf2, 0xdf3}, {0xe31, 0xe31},
+ {0xe34, 0xe3a}, {0xe47, 0xe4e}, {0xeb1, 0xeb1}, {0xeb4, 0xeb9},
+ {0xebb, 0xebc}, {0xec8, 0xecd}, {0xf18, 0xf19}, {0xf35, 0xf35},
+ {0xf37, 0xf37}, {0xf39, 0xf39}, {0xf3e, 0xf3f}, {0xf71, 0xf84},
+ {0xf86, 0xf87}, {0xf90, 0xf97}, {0xf99, 0xfbc}, {0xfc6, 0xfc6},
+ {0x102c, 0x1032}, {0x1036, 0x1039}, {0x1056, 0x1059}, {0x1712, 0x1714},
+ {0x1732, 0x1734}, {0x1752, 0x1753}, {0x1772, 0x1773}, {0x17b6, 0x17d3},
+ {0x17dd, 0x17dd}, {0x180b, 0x180d}, {0x18a9, 0x18a9}, {0x1920, 0x192b},
+ {0x1930, 0x193b}, {0x20d0, 0x20ea}, {0x302a, 0x302f}, {0x3099, 0x309a},
+ {0xfb1e, 0xfb1e}, {0xfe00, 0xfe0f}, {0xfe20, 0xfe23} };
+static const xmlChLRange xmlML[] = {{0x1d165, 0x1d169}, {0x1d16d, 0x1d172},
+ {0x1d17b, 0x1d182}, {0x1d185, 0x1d18b}, {0x1d1aa, 0x1d1ad},
+ {0xe0100, 0xe01ef} };
+static xmlChRangeGroup xmlMG = {113,6,xmlMS,xmlML};
+
+static const xmlChSRange xmlMcS[] = {{0x903, 0x903}, {0x93e, 0x940},
+ {0x949, 0x94c}, {0x982, 0x983}, {0x9be, 0x9c0}, {0x9c7, 0x9c8},
+ {0x9cb, 0x9cc}, {0x9d7, 0x9d7}, {0xa03, 0xa03}, {0xa3e, 0xa40},
+ {0xa83, 0xa83}, {0xabe, 0xac0}, {0xac9, 0xac9}, {0xacb, 0xacc},
+ {0xb02, 0xb03}, {0xb3e, 0xb3e}, {0xb40, 0xb40}, {0xb47, 0xb48},
+ {0xb4b, 0xb4c}, {0xb57, 0xb57}, {0xbbe, 0xbbf}, {0xbc1, 0xbc2},
+ {0xbc6, 0xbc8}, {0xbca, 0xbcc}, {0xbd7, 0xbd7}, {0xc01, 0xc03},
+ {0xc41, 0xc44}, {0xc82, 0xc83}, {0xcbe, 0xcbe}, {0xcc0, 0xcc4},
+ {0xcc7, 0xcc8}, {0xcca, 0xccb}, {0xcd5, 0xcd6}, {0xd02, 0xd03},
+ {0xd3e, 0xd40}, {0xd46, 0xd48}, {0xd4a, 0xd4c}, {0xd57, 0xd57},
+ {0xd82, 0xd83}, {0xdcf, 0xdd1}, {0xdd8, 0xddf}, {0xdf2, 0xdf3},
+ {0xf3e, 0xf3f}, {0xf7f, 0xf7f}, {0x102c, 0x102c}, {0x1031, 0x1031},
+ {0x1038, 0x1038}, {0x1056, 0x1057}, {0x17b6, 0x17b6}, {0x17be, 0x17c5},
+ {0x17c7, 0x17c8}, {0x1923, 0x1926}, {0x1929, 0x192b}, {0x1930, 0x1931},
+ {0x1933, 0x1938} };
+static const xmlChLRange xmlMcL[] = {{0x1d165, 0x1d166}, {0x1d16d, 0x1d172} };
+static xmlChRangeGroup xmlMcG = {55,2,xmlMcS,xmlMcL};
+
+static const xmlChSRange xmlMnS[] = {{0x300, 0x357}, {0x35d, 0x36f},
+ {0x483, 0x486}, {0x591, 0x5a1}, {0x5a3, 0x5b9}, {0x5bb, 0x5bd},
+ {0x5bf, 0x5bf}, {0x5c1, 0x5c2}, {0x5c4, 0x5c4}, {0x610, 0x615},
+ {0x64b, 0x658}, {0x670, 0x670}, {0x6d6, 0x6dc}, {0x6df, 0x6e4},
+ {0x6e7, 0x6e8}, {0x6ea, 0x6ed}, {0x711, 0x711}, {0x730, 0x74a},
+ {0x7a6, 0x7b0}, {0x901, 0x902}, {0x93c, 0x93c}, {0x941, 0x948},
+ {0x94d, 0x94d}, {0x951, 0x954}, {0x962, 0x963}, {0x981, 0x981},
+ {0x9bc, 0x9bc}, {0x9c1, 0x9c4}, {0x9cd, 0x9cd}, {0x9e2, 0x9e3},
+ {0xa01, 0xa02}, {0xa3c, 0xa3c}, {0xa41, 0xa42}, {0xa47, 0xa48},
+ {0xa4b, 0xa4d}, {0xa70, 0xa71}, {0xa81, 0xa82}, {0xabc, 0xabc},
+ {0xac1, 0xac5}, {0xac7, 0xac8}, {0xacd, 0xacd}, {0xae2, 0xae3},
+ {0xb01, 0xb01}, {0xb3c, 0xb3c}, {0xb3f, 0xb3f}, {0xb41, 0xb43},
+ {0xb4d, 0xb4d}, {0xb56, 0xb56}, {0xb82, 0xb82}, {0xbc0, 0xbc0},
+ {0xbcd, 0xbcd}, {0xc3e, 0xc40}, {0xc46, 0xc48}, {0xc4a, 0xc4d},
+ {0xc55, 0xc56}, {0xcbc, 0xcbc}, {0xcbf, 0xcbf}, {0xcc6, 0xcc6},
+ {0xccc, 0xccd}, {0xd41, 0xd43}, {0xd4d, 0xd4d}, {0xdca, 0xdca},
+ {0xdd2, 0xdd4}, {0xdd6, 0xdd6}, {0xe31, 0xe31}, {0xe34, 0xe3a},
+ {0xe47, 0xe4e}, {0xeb1, 0xeb1}, {0xeb4, 0xeb9}, {0xebb, 0xebc},
+ {0xec8, 0xecd}, {0xf18, 0xf19}, {0xf35, 0xf35}, {0xf37, 0xf37},
+ {0xf39, 0xf39}, {0xf71, 0xf7e}, {0xf80, 0xf84}, {0xf86, 0xf87},
+ {0xf90, 0xf97}, {0xf99, 0xfbc}, {0xfc6, 0xfc6}, {0x102d, 0x1030},
+ {0x1032, 0x1032}, {0x1036, 0x1037}, {0x1039, 0x1039}, {0x1058, 0x1059},
+ {0x1712, 0x1714}, {0x1732, 0x1734}, {0x1752, 0x1753}, {0x1772, 0x1773},
+ {0x17b7, 0x17bd}, {0x17c6, 0x17c6}, {0x17c9, 0x17d3}, {0x17dd, 0x17dd},
+ {0x180b, 0x180d}, {0x18a9, 0x18a9}, {0x1920, 0x1922}, {0x1927, 0x1928},
+ {0x1932, 0x1932}, {0x1939, 0x193b}, {0x20d0, 0x20dc}, {0x20e1, 0x20e1},
+ {0x20e5, 0x20ea}, {0x302a, 0x302f}, {0x3099, 0x309a}, {0xfb1e, 0xfb1e},
+ {0xfe00, 0xfe0f}, {0xfe20, 0xfe23} };
+static const xmlChLRange xmlMnL[] = {{0x1d167, 0x1d169}, {0x1d17b, 0x1d182},
+ {0x1d185, 0x1d18b}, {0x1d1aa, 0x1d1ad}, {0xe0100, 0xe01ef} };
+static xmlChRangeGroup xmlMnG = {108,5,xmlMnS,xmlMnL};
+
+static const xmlChSRange xmlNS[] = {{0x30, 0x39}, {0xb2, 0xb3},
+ {0xb9, 0xb9}, {0xbc, 0xbe}, {0x660, 0x669}, {0x6f0, 0x6f9},
+ {0x966, 0x96f}, {0x9e6, 0x9ef}, {0x9f4, 0x9f9}, {0xa66, 0xa6f},
+ {0xae6, 0xaef}, {0xb66, 0xb6f}, {0xbe7, 0xbf2}, {0xc66, 0xc6f},
+ {0xce6, 0xcef}, {0xd66, 0xd6f}, {0xe50, 0xe59}, {0xed0, 0xed9},
+ {0xf20, 0xf33}, {0x1040, 0x1049}, {0x1369, 0x137c}, {0x16ee, 0x16f0},
+ {0x17e0, 0x17e9}, {0x17f0, 0x17f9}, {0x1810, 0x1819}, {0x1946, 0x194f},
+ {0x2070, 0x2070}, {0x2074, 0x2079}, {0x2080, 0x2089}, {0x2153, 0x2183},
+ {0x2460, 0x249b}, {0x24ea, 0x24ff}, {0x2776, 0x2793}, {0x3007, 0x3007},
+ {0x3021, 0x3029}, {0x3038, 0x303a}, {0x3192, 0x3195}, {0x3220, 0x3229},
+ {0x3251, 0x325f}, {0x3280, 0x3289}, {0x32b1, 0x32bf}, {0xff10, 0xff19} };
+static const xmlChLRange xmlNL[] = {{0x10107, 0x10133}, {0x10320, 0x10323},
+ {0x1034a, 0x1034a}, {0x104a0, 0x104a9}, {0x1d7ce, 0x1d7ff} };
+static xmlChRangeGroup xmlNG = {42,5,xmlNS,xmlNL};
+
+static const xmlChSRange xmlNdS[] = {{0x30, 0x39}, {0x660, 0x669},
+ {0x6f0, 0x6f9}, {0x966, 0x96f}, {0x9e6, 0x9ef}, {0xa66, 0xa6f},
+ {0xae6, 0xaef}, {0xb66, 0xb6f}, {0xbe7, 0xbef}, {0xc66, 0xc6f},
+ {0xce6, 0xcef}, {0xd66, 0xd6f}, {0xe50, 0xe59}, {0xed0, 0xed9},
+ {0xf20, 0xf29}, {0x1040, 0x1049}, {0x1369, 0x1371}, {0x17e0, 0x17e9},
+ {0x1810, 0x1819}, {0x1946, 0x194f}, {0xff10, 0xff19} };
+static const xmlChLRange xmlNdL[] = {{0x104a0, 0x104a9}, {0x1d7ce, 0x1d7ff} };
+static xmlChRangeGroup xmlNdG = {21,2,xmlNdS,xmlNdL};
+
+static const xmlChSRange xmlNoS[] = {{0xb2, 0xb3}, {0xb9, 0xb9},
+ {0xbc, 0xbe}, {0x9f4, 0x9f9}, {0xbf0, 0xbf2}, {0xf2a, 0xf33},
+ {0x1372, 0x137c}, {0x17f0, 0x17f9}, {0x2070, 0x2070}, {0x2074, 0x2079},
+ {0x2080, 0x2089}, {0x2153, 0x215f}, {0x2460, 0x249b}, {0x24ea, 0x24ff},
+ {0x2776, 0x2793}, {0x3192, 0x3195}, {0x3220, 0x3229}, {0x3251, 0x325f},
+ {0x3280, 0x3289}, {0x32b1, 0x32bf} };
+static const xmlChLRange xmlNoL[] = {{0x10107, 0x10133}, {0x10320, 0x10323} };
+static xmlChRangeGroup xmlNoG = {20,2,xmlNoS,xmlNoL};
+
+static const xmlChSRange xmlPS[] = {{0x21, 0x23}, {0x25, 0x2a},
+ {0x2c, 0x2f}, {0x3a, 0x3b}, {0x3f, 0x40}, {0x5b, 0x5d}, {0x5f, 0x5f},
+ {0x7b, 0x7b}, {0x7d, 0x7d}, {0xa1, 0xa1}, {0xab, 0xab}, {0xb7, 0xb7},
+ {0xbb, 0xbb}, {0xbf, 0xbf}, {0x37e, 0x37e}, {0x387, 0x387},
+ {0x55a, 0x55f}, {0x589, 0x58a}, {0x5be, 0x5be}, {0x5c0, 0x5c0},
+ {0x5c3, 0x5c3}, {0x5f3, 0x5f4}, {0x60c, 0x60d}, {0x61b, 0x61b},
+ {0x61f, 0x61f}, {0x66a, 0x66d}, {0x6d4, 0x6d4}, {0x700, 0x70d},
+ {0x964, 0x965}, {0x970, 0x970}, {0xdf4, 0xdf4}, {0xe4f, 0xe4f},
+ {0xe5a, 0xe5b}, {0xf04, 0xf12}, {0xf3a, 0xf3d}, {0xf85, 0xf85},
+ {0x104a, 0x104f}, {0x10fb, 0x10fb}, {0x1361, 0x1368}, {0x166d, 0x166e},
+ {0x169b, 0x169c}, {0x16eb, 0x16ed}, {0x1735, 0x1736}, {0x17d4, 0x17d6},
+ {0x17d8, 0x17da}, {0x1800, 0x180a}, {0x1944, 0x1945}, {0x2010, 0x2027},
+ {0x2030, 0x2043}, {0x2045, 0x2051}, {0x2053, 0x2054}, {0x2057, 0x2057},
+ {0x207d, 0x207e}, {0x208d, 0x208e}, {0x2329, 0x232a}, {0x23b4, 0x23b6},
+ {0x2768, 0x2775}, {0x27e6, 0x27eb}, {0x2983, 0x2998}, {0x29d8, 0x29db},
+ {0x29fc, 0x29fd}, {0x3001, 0x3003}, {0x3008, 0x3011}, {0x3014, 0x301f},
+ {0x3030, 0x3030}, {0x303d, 0x303d}, {0x30a0, 0x30a0}, {0x30fb, 0x30fb},
+ {0xfd3e, 0xfd3f}, {0xfe30, 0xfe52}, {0xfe54, 0xfe61}, {0xfe63, 0xfe63},
+ {0xfe68, 0xfe68}, {0xfe6a, 0xfe6b}, {0xff01, 0xff03}, {0xff05, 0xff0a},
+ {0xff0c, 0xff0f}, {0xff1a, 0xff1b}, {0xff1f, 0xff20}, {0xff3b, 0xff3d},
+ {0xff3f, 0xff3f}, {0xff5b, 0xff5b}, {0xff5d, 0xff5d}, {0xff5f, 0xff65} };
+static const xmlChLRange xmlPL[] = {{0x10100, 0x10101}, {0x1039f, 0x1039f} };
+static xmlChRangeGroup xmlPG = {84,2,xmlPS,xmlPL};
+
+static const xmlChSRange xmlPdS[] = {{0x2d, 0x2d}, {0x58a, 0x58a},
+ {0x1806, 0x1806}, {0x2010, 0x2015}, {0x301c, 0x301c}, {0x3030, 0x3030},
+ {0x30a0, 0x30a0}, {0xfe31, 0xfe32}, {0xfe58, 0xfe58}, {0xfe63, 0xfe63},
+ {0xff0d, 0xff0d} };
+static xmlChRangeGroup xmlPdG = {11,0,xmlPdS,NULL};
+
+static const xmlChSRange xmlPeS[] = {{0x29, 0x29}, {0x5d, 0x5d},
+ {0x7d, 0x7d}, {0xf3b, 0xf3b}, {0xf3d, 0xf3d}, {0x169c, 0x169c},
+ {0x2046, 0x2046}, {0x207e, 0x207e}, {0x208e, 0x208e}, {0x232a, 0x232a},
+ {0x23b5, 0x23b5}, {0x2769, 0x2769}, {0x276b, 0x276b}, {0x276d, 0x276d},
+ {0x276f, 0x276f}, {0x2771, 0x2771}, {0x2773, 0x2773}, {0x2775, 0x2775},
+ {0x27e7, 0x27e7}, {0x27e9, 0x27e9}, {0x27eb, 0x27eb}, {0x2984, 0x2984},
+ {0x2986, 0x2986}, {0x2988, 0x2988}, {0x298a, 0x298a}, {0x298c, 0x298c},
+ {0x298e, 0x298e}, {0x2990, 0x2990}, {0x2992, 0x2992}, {0x2994, 0x2994},
+ {0x2996, 0x2996}, {0x2998, 0x2998}, {0x29d9, 0x29d9}, {0x29db, 0x29db},
+ {0x29fd, 0x29fd}, {0x3009, 0x3009}, {0x300b, 0x300b}, {0x300d, 0x300d},
+ {0x300f, 0x300f}, {0x3011, 0x3011}, {0x3015, 0x3015}, {0x3017, 0x3017},
+ {0x3019, 0x3019}, {0x301b, 0x301b}, {0x301e, 0x301f}, {0xfd3f, 0xfd3f},
+ {0xfe36, 0xfe36}, {0xfe38, 0xfe38}, {0xfe3a, 0xfe3a}, {0xfe3c, 0xfe3c},
+ {0xfe3e, 0xfe3e}, {0xfe40, 0xfe40}, {0xfe42, 0xfe42}, {0xfe44, 0xfe44},
+ {0xfe48, 0xfe48}, {0xfe5a, 0xfe5a}, {0xfe5c, 0xfe5c}, {0xfe5e, 0xfe5e},
+ {0xff09, 0xff09}, {0xff3d, 0xff3d}, {0xff5d, 0xff5d}, {0xff60, 0xff60},
+ {0xff63, 0xff63} };
+static xmlChRangeGroup xmlPeG = {63,0,xmlPeS,NULL};
+
+static const xmlChSRange xmlPoS[] = {{0x21, 0x23}, {0x25, 0x27},
+ {0x2a, 0x2a}, {0x2c, 0x2c}, {0x2e, 0x2f}, {0x3a, 0x3b}, {0x3f, 0x40},
+ {0x5c, 0x5c}, {0xa1, 0xa1}, {0xb7, 0xb7}, {0xbf, 0xbf}, {0x37e, 0x37e},
+ {0x387, 0x387}, {0x55a, 0x55f}, {0x589, 0x589}, {0x5be, 0x5be},
+ {0x5c0, 0x5c0}, {0x5c3, 0x5c3}, {0x5f3, 0x5f4}, {0x60c, 0x60d},
+ {0x61b, 0x61b}, {0x61f, 0x61f}, {0x66a, 0x66d}, {0x6d4, 0x6d4},
+ {0x700, 0x70d}, {0x964, 0x965}, {0x970, 0x970}, {0xdf4, 0xdf4},
+ {0xe4f, 0xe4f}, {0xe5a, 0xe5b}, {0xf04, 0xf12}, {0xf85, 0xf85},
+ {0x104a, 0x104f}, {0x10fb, 0x10fb}, {0x1361, 0x1368}, {0x166d, 0x166e},
+ {0x16eb, 0x16ed}, {0x1735, 0x1736}, {0x17d4, 0x17d6}, {0x17d8, 0x17da},
+ {0x1800, 0x1805}, {0x1807, 0x180a}, {0x1944, 0x1945}, {0x2016, 0x2017},
+ {0x2020, 0x2027}, {0x2030, 0x2038}, {0x203b, 0x203e}, {0x2041, 0x2043},
+ {0x2047, 0x2051}, {0x2053, 0x2053}, {0x2057, 0x2057}, {0x23b6, 0x23b6},
+ {0x3001, 0x3003}, {0x303d, 0x303d}, {0xfe30, 0xfe30}, {0xfe45, 0xfe46},
+ {0xfe49, 0xfe4c}, {0xfe50, 0xfe52}, {0xfe54, 0xfe57}, {0xfe5f, 0xfe61},
+ {0xfe68, 0xfe68}, {0xfe6a, 0xfe6b}, {0xff01, 0xff03}, {0xff05, 0xff07},
+ {0xff0a, 0xff0a}, {0xff0c, 0xff0c}, {0xff0e, 0xff0f}, {0xff1a, 0xff1b},
+ {0xff1f, 0xff20}, {0xff3c, 0xff3c}, {0xff61, 0xff61}, {0xff64, 0xff64} };
+static const xmlChLRange xmlPoL[] = {{0x10100, 0x10101}, {0x1039f, 0x1039f} };
+static xmlChRangeGroup xmlPoG = {72,2,xmlPoS,xmlPoL};
+
+static const xmlChSRange xmlPsS[] = {{0x28, 0x28}, {0x5b, 0x5b},
+ {0x7b, 0x7b}, {0xf3a, 0xf3a}, {0xf3c, 0xf3c}, {0x169b, 0x169b},
+ {0x201a, 0x201a}, {0x201e, 0x201e}, {0x2045, 0x2045}, {0x207d, 0x207d},
+ {0x208d, 0x208d}, {0x2329, 0x2329}, {0x23b4, 0x23b4}, {0x2768, 0x2768},
+ {0x276a, 0x276a}, {0x276c, 0x276c}, {0x276e, 0x276e}, {0x2770, 0x2770},
+ {0x2772, 0x2772}, {0x2774, 0x2774}, {0x27e6, 0x27e6}, {0x27e8, 0x27e8},
+ {0x27ea, 0x27ea}, {0x2983, 0x2983}, {0x2985, 0x2985}, {0x2987, 0x2987},
+ {0x2989, 0x2989}, {0x298b, 0x298b}, {0x298d, 0x298d}, {0x298f, 0x298f},
+ {0x2991, 0x2991}, {0x2993, 0x2993}, {0x2995, 0x2995}, {0x2997, 0x2997},
+ {0x29d8, 0x29d8}, {0x29da, 0x29da}, {0x29fc, 0x29fc}, {0x3008, 0x3008},
+ {0x300a, 0x300a}, {0x300c, 0x300c}, {0x300e, 0x300e}, {0x3010, 0x3010},
+ {0x3014, 0x3014}, {0x3016, 0x3016}, {0x3018, 0x3018}, {0x301a, 0x301a},
+ {0x301d, 0x301d}, {0xfd3e, 0xfd3e}, {0xfe35, 0xfe35}, {0xfe37, 0xfe37},
+ {0xfe39, 0xfe39}, {0xfe3b, 0xfe3b}, {0xfe3d, 0xfe3d}, {0xfe3f, 0xfe3f},
+ {0xfe41, 0xfe41}, {0xfe43, 0xfe43}, {0xfe47, 0xfe47}, {0xfe59, 0xfe59},
+ {0xfe5b, 0xfe5b}, {0xfe5d, 0xfe5d}, {0xff08, 0xff08}, {0xff3b, 0xff3b},
+ {0xff5b, 0xff5b}, {0xff5f, 0xff5f}, {0xff62, 0xff62} };
+static xmlChRangeGroup xmlPsG = {65,0,xmlPsS,NULL};
+
+static const xmlChSRange xmlSS[] = {{0x24, 0x24}, {0x2b, 0x2b},
+ {0x3c, 0x3e}, {0x5e, 0x5e}, {0x60, 0x60}, {0x7c, 0x7c}, {0x7e, 0x7e},
+ {0xa2, 0xa9}, {0xac, 0xac}, {0xae, 0xb1}, {0xb4, 0xb4}, {0xb6, 0xb6},
+ {0xb8, 0xb8}, {0xd7, 0xd7}, {0xf7, 0xf7}, {0x2c2, 0x2c5},
+ {0x2d2, 0x2df}, {0x2e5, 0x2ed}, {0x2ef, 0x2ff}, {0x374, 0x375},
+ {0x384, 0x385}, {0x3f6, 0x3f6}, {0x482, 0x482}, {0x60e, 0x60f},
+ {0x6e9, 0x6e9}, {0x6fd, 0x6fe}, {0x9f2, 0x9f3}, {0x9fa, 0x9fa},
+ {0xaf1, 0xaf1}, {0xb70, 0xb70}, {0xbf3, 0xbfa}, {0xe3f, 0xe3f},
+ {0xf01, 0xf03}, {0xf13, 0xf17}, {0xf1a, 0xf1f}, {0xf34, 0xf34},
+ {0xf36, 0xf36}, {0xf38, 0xf38}, {0xfbe, 0xfc5}, {0xfc7, 0xfcc},
+ {0xfcf, 0xfcf}, {0x17db, 0x17db}, {0x1940, 0x1940}, {0x19e0, 0x19ff},
+ {0x1fbd, 0x1fbd}, {0x1fbf, 0x1fc1}, {0x1fcd, 0x1fcf}, {0x1fdd, 0x1fdf},
+ {0x1fed, 0x1fef}, {0x1ffd, 0x1ffe}, {0x2044, 0x2044}, {0x2052, 0x2052},
+ {0x207a, 0x207c}, {0x208a, 0x208c}, {0x20a0, 0x20b1}, {0x2100, 0x2101},
+ {0x2103, 0x2106}, {0x2108, 0x2109}, {0x2114, 0x2114}, {0x2116, 0x2118},
+ {0x211e, 0x2123}, {0x2125, 0x2125}, {0x2127, 0x2127}, {0x2129, 0x2129},
+ {0x212e, 0x212e}, {0x2132, 0x2132}, {0x213a, 0x213b}, {0x2140, 0x2144},
+ {0x214a, 0x214b}, {0x2190, 0x2328}, {0x232b, 0x23b3}, {0x23b7, 0x23d0},
+ {0x2400, 0x2426}, {0x2440, 0x244a}, {0x249c, 0x24e9}, {0x2500, 0x2617},
+ {0x2619, 0x267d}, {0x2680, 0x2691}, {0x26a0, 0x26a1}, {0x2701, 0x2704},
+ {0x2706, 0x2709}, {0x270c, 0x2727}, {0x2729, 0x274b}, {0x274d, 0x274d},
+ {0x274f, 0x2752}, {0x2756, 0x2756}, {0x2758, 0x275e}, {0x2761, 0x2767},
+ {0x2794, 0x2794}, {0x2798, 0x27af}, {0x27b1, 0x27be}, {0x27d0, 0x27e5},
+ {0x27f0, 0x2982}, {0x2999, 0x29d7}, {0x29dc, 0x29fb}, {0x29fe, 0x2b0d},
+ {0x2e80, 0x2e99}, {0x2e9b, 0x2ef3}, {0x2f00, 0x2fd5}, {0x2ff0, 0x2ffb},
+ {0x3004, 0x3004}, {0x3012, 0x3013}, {0x3020, 0x3020}, {0x3036, 0x3037},
+ {0x303e, 0x303f}, {0x309b, 0x309c}, {0x3190, 0x3191}, {0x3196, 0x319f},
+ {0x3200, 0x321e}, {0x322a, 0x3243}, {0x3250, 0x3250}, {0x3260, 0x327d},
+ {0x327f, 0x327f}, {0x328a, 0x32b0}, {0x32c0, 0x32fe}, {0x3300, 0x33ff},
+ {0x4dc0, 0x4dff}, {0xa490, 0xa4c6}, {0xfb29, 0xfb29}, {0xfdfc, 0xfdfd},
+ {0xfe62, 0xfe62}, {0xfe64, 0xfe66}, {0xfe69, 0xfe69}, {0xff04, 0xff04},
+ {0xff0b, 0xff0b}, {0xff1c, 0xff1e}, {0xff3e, 0xff3e}, {0xff40, 0xff40},
+ {0xff5c, 0xff5c}, {0xff5e, 0xff5e}, {0xffe0, 0xffe6}, {0xffe8, 0xffee},
+ {0xfffc, 0xfffd} };
+static const xmlChLRange xmlSL[] = {{0x10102, 0x10102}, {0x10137, 0x1013f},
+ {0x1d000, 0x1d0f5}, {0x1d100, 0x1d126}, {0x1d12a, 0x1d164},
+ {0x1d16a, 0x1d16c}, {0x1d183, 0x1d184}, {0x1d18c, 0x1d1a9},
+ {0x1d1ae, 0x1d1dd}, {0x1d300, 0x1d356}, {0x1d6c1, 0x1d6c1},
+ {0x1d6db, 0x1d6db}, {0x1d6fb, 0x1d6fb}, {0x1d715, 0x1d715},
+ {0x1d735, 0x1d735}, {0x1d74f, 0x1d74f}, {0x1d76f, 0x1d76f},
+ {0x1d789, 0x1d789}, {0x1d7a9, 0x1d7a9}, {0x1d7c3, 0x1d7c3} };
+static xmlChRangeGroup xmlSG = {133,20,xmlSS,xmlSL};
+
+static const xmlChSRange xmlScS[] = {{0x24, 0x24}, {0xa2, 0xa5},
+ {0x9f2, 0x9f3}, {0xaf1, 0xaf1}, {0xbf9, 0xbf9}, {0xe3f, 0xe3f},
+ {0x17db, 0x17db}, {0x20a0, 0x20b1}, {0xfdfc, 0xfdfc}, {0xfe69, 0xfe69},
+ {0xff04, 0xff04}, {0xffe0, 0xffe1}, {0xffe5, 0xffe6} };
+static xmlChRangeGroup xmlScG = {13,0,xmlScS,NULL};
+
+static const xmlChSRange xmlSkS[] = {{0x5e, 0x5e}, {0x60, 0x60},
+ {0xa8, 0xa8}, {0xaf, 0xaf}, {0xb4, 0xb4}, {0xb8, 0xb8}, {0x2c2, 0x2c5},
+ {0x2d2, 0x2df}, {0x2e5, 0x2ed}, {0x2ef, 0x2ff}, {0x374, 0x375},
+ {0x384, 0x385}, {0x1fbd, 0x1fbd}, {0x1fbf, 0x1fc1}, {0x1fcd, 0x1fcf},
+ {0x1fdd, 0x1fdf}, {0x1fed, 0x1fef}, {0x1ffd, 0x1ffe}, {0x309b, 0x309c},
+ {0xff3e, 0xff3e}, {0xff40, 0xff40}, {0xffe3, 0xffe3} };
+static xmlChRangeGroup xmlSkG = {22,0,xmlSkS,NULL};
+
+static const xmlChSRange xmlSmS[] = {{0x2b, 0x2b}, {0x3c, 0x3e},
+ {0x7c, 0x7c}, {0x7e, 0x7e}, {0xac, 0xac}, {0xb1, 0xb1}, {0xd7, 0xd7},
+ {0xf7, 0xf7}, {0x3f6, 0x3f6}, {0x2044, 0x2044}, {0x2052, 0x2052},
+ {0x207a, 0x207c}, {0x208a, 0x208c}, {0x2140, 0x2144}, {0x214b, 0x214b},
+ {0x2190, 0x2194}, {0x219a, 0x219b}, {0x21a0, 0x21a0}, {0x21a3, 0x21a3},
+ {0x21a6, 0x21a6}, {0x21ae, 0x21ae}, {0x21ce, 0x21cf}, {0x21d2, 0x21d2},
+ {0x21d4, 0x21d4}, {0x21f4, 0x22ff}, {0x2308, 0x230b}, {0x2320, 0x2321},
+ {0x237c, 0x237c}, {0x239b, 0x23b3}, {0x25b7, 0x25b7}, {0x25c1, 0x25c1},
+ {0x25f8, 0x25ff}, {0x266f, 0x266f}, {0x27d0, 0x27e5}, {0x27f0, 0x27ff},
+ {0x2900, 0x2982}, {0x2999, 0x29d7}, {0x29dc, 0x29fb}, {0x29fe, 0x2aff},
+ {0xfb29, 0xfb29}, {0xfe62, 0xfe62}, {0xfe64, 0xfe66}, {0xff0b, 0xff0b},
+ {0xff1c, 0xff1e}, {0xff5c, 0xff5c}, {0xff5e, 0xff5e}, {0xffe2, 0xffe2},
+ {0xffe9, 0xffec} };
+static const xmlChLRange xmlSmL[] = {{0x1d6c1, 0x1d6c1}, {0x1d6db, 0x1d6db},
+ {0x1d6fb, 0x1d6fb}, {0x1d715, 0x1d715}, {0x1d735, 0x1d735},
+ {0x1d74f, 0x1d74f}, {0x1d76f, 0x1d76f}, {0x1d789, 0x1d789},
+ {0x1d7a9, 0x1d7a9}, {0x1d7c3, 0x1d7c3} };
+static xmlChRangeGroup xmlSmG = {48,10,xmlSmS,xmlSmL};
+
+static const xmlChSRange xmlSoS[] = {{0xa6, 0xa7}, {0xa9, 0xa9},
+ {0xae, 0xae}, {0xb0, 0xb0}, {0xb6, 0xb6}, {0x482, 0x482},
+ {0x60e, 0x60f}, {0x6e9, 0x6e9}, {0x6fd, 0x6fe}, {0x9fa, 0x9fa},
+ {0xb70, 0xb70}, {0xbf3, 0xbf8}, {0xbfa, 0xbfa}, {0xf01, 0xf03},
+ {0xf13, 0xf17}, {0xf1a, 0xf1f}, {0xf34, 0xf34}, {0xf36, 0xf36},
+ {0xf38, 0xf38}, {0xfbe, 0xfc5}, {0xfc7, 0xfcc}, {0xfcf, 0xfcf},
+ {0x1940, 0x1940}, {0x19e0, 0x19ff}, {0x2100, 0x2101}, {0x2103, 0x2106},
+ {0x2108, 0x2109}, {0x2114, 0x2114}, {0x2116, 0x2118}, {0x211e, 0x2123},
+ {0x2125, 0x2125}, {0x2127, 0x2127}, {0x2129, 0x2129}, {0x212e, 0x212e},
+ {0x2132, 0x2132}, {0x213a, 0x213b}, {0x214a, 0x214a}, {0x2195, 0x2199},
+ {0x219c, 0x219f}, {0x21a1, 0x21a2}, {0x21a4, 0x21a5}, {0x21a7, 0x21ad},
+ {0x21af, 0x21cd}, {0x21d0, 0x21d1}, {0x21d3, 0x21d3}, {0x21d5, 0x21f3},
+ {0x2300, 0x2307}, {0x230c, 0x231f}, {0x2322, 0x2328}, {0x232b, 0x237b},
+ {0x237d, 0x239a}, {0x23b7, 0x23d0}, {0x2400, 0x2426}, {0x2440, 0x244a},
+ {0x249c, 0x24e9}, {0x2500, 0x25b6}, {0x25b8, 0x25c0}, {0x25c2, 0x25f7},
+ {0x2600, 0x2617}, {0x2619, 0x266e}, {0x2670, 0x267d}, {0x2680, 0x2691},
+ {0x26a0, 0x26a1}, {0x2701, 0x2704}, {0x2706, 0x2709}, {0x270c, 0x2727},
+ {0x2729, 0x274b}, {0x274d, 0x274d}, {0x274f, 0x2752}, {0x2756, 0x2756},
+ {0x2758, 0x275e}, {0x2761, 0x2767}, {0x2794, 0x2794}, {0x2798, 0x27af},
+ {0x27b1, 0x27be}, {0x2800, 0x28ff}, {0x2b00, 0x2b0d}, {0x2e80, 0x2e99},
+ {0x2e9b, 0x2ef3}, {0x2f00, 0x2fd5}, {0x2ff0, 0x2ffb}, {0x3004, 0x3004},
+ {0x3012, 0x3013}, {0x3020, 0x3020}, {0x3036, 0x3037}, {0x303e, 0x303f},
+ {0x3190, 0x3191}, {0x3196, 0x319f}, {0x3200, 0x321e}, {0x322a, 0x3243},
+ {0x3250, 0x3250}, {0x3260, 0x327d}, {0x327f, 0x327f}, {0x328a, 0x32b0},
+ {0x32c0, 0x32fe}, {0x3300, 0x33ff}, {0x4dc0, 0x4dff}, {0xa490, 0xa4c6},
+ {0xfdfd, 0xfdfd}, {0xffe4, 0xffe4}, {0xffe8, 0xffe8}, {0xffed, 0xffee},
+ {0xfffc, 0xfffd} };
+static const xmlChLRange xmlSoL[] = {{0x10102, 0x10102}, {0x10137, 0x1013f},
+ {0x1d000, 0x1d0f5}, {0x1d100, 0x1d126}, {0x1d12a, 0x1d164},
+ {0x1d16a, 0x1d16c}, {0x1d183, 0x1d184}, {0x1d18c, 0x1d1a9},
+ {0x1d1ae, 0x1d1dd}, {0x1d300, 0x1d356} };
+static xmlChRangeGroup xmlSoG = {103,10,xmlSoS,xmlSoL};
+
+static const xmlChSRange xmlZS[] = {{0x20, 0x20}, {0xa0, 0xa0},
+ {0x1680, 0x1680}, {0x180e, 0x180e}, {0x2000, 0x200a}, {0x2028, 0x2029},
+ {0x202f, 0x202f}, {0x205f, 0x205f}, {0x3000, 0x3000} };
+static xmlChRangeGroup xmlZG = {9,0,xmlZS,NULL};
+
+static xmlUnicodeNameTable xmlUnicodeBlockTbl = {xmlUnicodeBlocks, 128};
+static xmlUnicodeNameTable xmlUnicodeCatTbl = {xmlUnicodeCats, 36};
+
+/**
+ * xmlUnicodeLookup:
+ * @tptr: pointer to the name table
+ * @name: name to be found
+ *
+ * binary table lookup for user-supplied name
+ *
+ * Returns pointer to range function if found, otherwise NULL
+ */
+static xmlIntFunc
+*xmlUnicodeLookup(xmlUnicodeNameTable *tptr, const char *tname) {
+ int low, high, mid, cmp;
+ xmlUnicodeRange *sptr;
+
+ if ((tptr == NULL) || (tname == NULL)) return(NULL);
+
+ low = 0;
+ high = tptr->numentries - 1;
+ sptr = tptr->table;
+ while (low <= high) {
+ mid = (low + high) / 2;
+ if ((cmp=strcmp(tname, sptr[mid].rangename)) == 0)
+ return (sptr[mid].func);
+ if (cmp < 0)
+ high = mid - 1;
+ else
+ low = mid + 1;
+ }
+ return (NULL);
+}
+
+/**
+ * xmlUCSIsAegeanNumbers:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of AegeanNumbers UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsAegeanNumbers(int code) {
+ return(((code >= 0x10100) && (code <= 0x1013F)));
+}
+
+/**
+ * xmlUCSIsAlphabeticPresentationForms:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of AlphabeticPresentationForms UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsAlphabeticPresentationForms(int code) {
+ return(((code >= 0xFB00) && (code <= 0xFB4F)));
+}
+
+/**
+ * xmlUCSIsArabic:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Arabic UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsArabic(int code) {
+ return(((code >= 0x0600) && (code <= 0x06FF)));
+}
+
+/**
+ * xmlUCSIsArabicPresentationFormsA:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of ArabicPresentationForms-A UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsArabicPresentationFormsA(int code) {
+ return(((code >= 0xFB50) && (code <= 0xFDFF)));
+}
+
+/**
+ * xmlUCSIsArabicPresentationFormsB:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of ArabicPresentationForms-B UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsArabicPresentationFormsB(int code) {
+ return(((code >= 0xFE70) && (code <= 0xFEFF)));
+}
+
+/**
+ * xmlUCSIsArmenian:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Armenian UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsArmenian(int code) {
+ return(((code >= 0x0530) && (code <= 0x058F)));
+}
+
+/**
+ * xmlUCSIsArrows:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Arrows UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsArrows(int code) {
+ return(((code >= 0x2190) && (code <= 0x21FF)));
+}
+
+/**
+ * xmlUCSIsBasicLatin:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of BasicLatin UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsBasicLatin(int code) {
+ return(((code >= 0x0000) && (code <= 0x007F)));
+}
+
+/**
+ * xmlUCSIsBengali:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Bengali UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsBengali(int code) {
+ return(((code >= 0x0980) && (code <= 0x09FF)));
+}
+
+/**
+ * xmlUCSIsBlockElements:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of BlockElements UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsBlockElements(int code) {
+ return(((code >= 0x2580) && (code <= 0x259F)));
+}
+
+/**
+ * xmlUCSIsBopomofo:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Bopomofo UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsBopomofo(int code) {
+ return(((code >= 0x3100) && (code <= 0x312F)));
+}
+
+/**
+ * xmlUCSIsBopomofoExtended:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of BopomofoExtended UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsBopomofoExtended(int code) {
+ return(((code >= 0x31A0) && (code <= 0x31BF)));
+}
+
+/**
+ * xmlUCSIsBoxDrawing:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of BoxDrawing UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsBoxDrawing(int code) {
+ return(((code >= 0x2500) && (code <= 0x257F)));
+}
+
+/**
+ * xmlUCSIsBraillePatterns:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of BraillePatterns UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsBraillePatterns(int code) {
+ return(((code >= 0x2800) && (code <= 0x28FF)));
+}
+
+/**
+ * xmlUCSIsBuhid:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Buhid UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsBuhid(int code) {
+ return(((code >= 0x1740) && (code <= 0x175F)));
+}
+
+/**
+ * xmlUCSIsByzantineMusicalSymbols:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of ByzantineMusicalSymbols UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsByzantineMusicalSymbols(int code) {
+ return(((code >= 0x1D000) && (code <= 0x1D0FF)));
+}
+
+/**
+ * xmlUCSIsCJKCompatibility:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CJKCompatibility UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCJKCompatibility(int code) {
+ return(((code >= 0x3300) && (code <= 0x33FF)));
+}
+
+/**
+ * xmlUCSIsCJKCompatibilityForms:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CJKCompatibilityForms UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCJKCompatibilityForms(int code) {
+ return(((code >= 0xFE30) && (code <= 0xFE4F)));
+}
+
+/**
+ * xmlUCSIsCJKCompatibilityIdeographs:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CJKCompatibilityIdeographs UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCJKCompatibilityIdeographs(int code) {
+ return(((code >= 0xF900) && (code <= 0xFAFF)));
+}
+
+/**
+ * xmlUCSIsCJKCompatibilityIdeographsSupplement:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CJKCompatibilityIdeographsSupplement UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCJKCompatibilityIdeographsSupplement(int code) {
+ return(((code >= 0x2F800) && (code <= 0x2FA1F)));
+}
+
+/**
+ * xmlUCSIsCJKRadicalsSupplement:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CJKRadicalsSupplement UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCJKRadicalsSupplement(int code) {
+ return(((code >= 0x2E80) && (code <= 0x2EFF)));
+}
+
+/**
+ * xmlUCSIsCJKSymbolsandPunctuation:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CJKSymbolsandPunctuation UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCJKSymbolsandPunctuation(int code) {
+ return(((code >= 0x3000) && (code <= 0x303F)));
+}
+
+/**
+ * xmlUCSIsCJKUnifiedIdeographs:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CJKUnifiedIdeographs UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCJKUnifiedIdeographs(int code) {
+ return(((code >= 0x4E00) && (code <= 0x9FFF)));
+}
+
+/**
+ * xmlUCSIsCJKUnifiedIdeographsExtensionA:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CJKUnifiedIdeographsExtensionA UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCJKUnifiedIdeographsExtensionA(int code) {
+ return(((code >= 0x3400) && (code <= 0x4DBF)));
+}
+
+/**
+ * xmlUCSIsCJKUnifiedIdeographsExtensionB:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CJKUnifiedIdeographsExtensionB UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCJKUnifiedIdeographsExtensionB(int code) {
+ return(((code >= 0x20000) && (code <= 0x2A6DF)));
+}
+
+/**
+ * xmlUCSIsCherokee:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Cherokee UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCherokee(int code) {
+ return(((code >= 0x13A0) && (code <= 0x13FF)));
+}
+
+/**
+ * xmlUCSIsCombiningDiacriticalMarks:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CombiningDiacriticalMarks UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCombiningDiacriticalMarks(int code) {
+ return(((code >= 0x0300) && (code <= 0x036F)));
+}
+
+/**
+ * xmlUCSIsCombiningDiacriticalMarksforSymbols:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CombiningDiacriticalMarksforSymbols UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCombiningDiacriticalMarksforSymbols(int code) {
+ return(((code >= 0x20D0) && (code <= 0x20FF)));
+}
+
+/**
+ * xmlUCSIsCombiningHalfMarks:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CombiningHalfMarks UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCombiningHalfMarks(int code) {
+ return(((code >= 0xFE20) && (code <= 0xFE2F)));
+}
+
+/**
+ * xmlUCSIsCombiningMarksforSymbols:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CombiningMarksforSymbols UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCombiningMarksforSymbols(int code) {
+ return(((code >= 0x20D0) && (code <= 0x20FF)));
+}
+
+/**
+ * xmlUCSIsControlPictures:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of ControlPictures UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsControlPictures(int code) {
+ return(((code >= 0x2400) && (code <= 0x243F)));
+}
+
+/**
+ * xmlUCSIsCurrencySymbols:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CurrencySymbols UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCurrencySymbols(int code) {
+ return(((code >= 0x20A0) && (code <= 0x20CF)));
+}
+
+/**
+ * xmlUCSIsCypriotSyllabary:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CypriotSyllabary UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCypriotSyllabary(int code) {
+ return(((code >= 0x10800) && (code <= 0x1083F)));
+}
+
+/**
+ * xmlUCSIsCyrillic:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Cyrillic UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCyrillic(int code) {
+ return(((code >= 0x0400) && (code <= 0x04FF)));
+}
+
+/**
+ * xmlUCSIsCyrillicSupplement:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CyrillicSupplement UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCyrillicSupplement(int code) {
+ return(((code >= 0x0500) && (code <= 0x052F)));
+}
+
+/**
+ * xmlUCSIsDeseret:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Deseret UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsDeseret(int code) {
+ return(((code >= 0x10400) && (code <= 0x1044F)));
+}
+
+/**
+ * xmlUCSIsDevanagari:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Devanagari UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsDevanagari(int code) {
+ return(((code >= 0x0900) && (code <= 0x097F)));
+}
+
+/**
+ * xmlUCSIsDingbats:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Dingbats UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsDingbats(int code) {
+ return(((code >= 0x2700) && (code <= 0x27BF)));
+}
+
+/**
+ * xmlUCSIsEnclosedAlphanumerics:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of EnclosedAlphanumerics UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsEnclosedAlphanumerics(int code) {
+ return(((code >= 0x2460) && (code <= 0x24FF)));
+}
+
+/**
+ * xmlUCSIsEnclosedCJKLettersandMonths:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of EnclosedCJKLettersandMonths UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsEnclosedCJKLettersandMonths(int code) {
+ return(((code >= 0x3200) && (code <= 0x32FF)));
+}
+
+/**
+ * xmlUCSIsEthiopic:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Ethiopic UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsEthiopic(int code) {
+ return(((code >= 0x1200) && (code <= 0x137F)));
+}
+
+/**
+ * xmlUCSIsGeneralPunctuation:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of GeneralPunctuation UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsGeneralPunctuation(int code) {
+ return(((code >= 0x2000) && (code <= 0x206F)));
+}
+
+/**
+ * xmlUCSIsGeometricShapes:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of GeometricShapes UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsGeometricShapes(int code) {
+ return(((code >= 0x25A0) && (code <= 0x25FF)));
+}
+
+/**
+ * xmlUCSIsGeorgian:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Georgian UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsGeorgian(int code) {
+ return(((code >= 0x10A0) && (code <= 0x10FF)));
+}
+
+/**
+ * xmlUCSIsGothic:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Gothic UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsGothic(int code) {
+ return(((code >= 0x10330) && (code <= 0x1034F)));
+}
+
+/**
+ * xmlUCSIsGreek:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Greek UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsGreek(int code) {
+ return(((code >= 0x0370) && (code <= 0x03FF)));
+}
+
+/**
+ * xmlUCSIsGreekExtended:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of GreekExtended UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsGreekExtended(int code) {
+ return(((code >= 0x1F00) && (code <= 0x1FFF)));
+}
+
+/**
+ * xmlUCSIsGreekandCoptic:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of GreekandCoptic UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsGreekandCoptic(int code) {
+ return(((code >= 0x0370) && (code <= 0x03FF)));
+}
+
+/**
+ * xmlUCSIsGujarati:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Gujarati UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsGujarati(int code) {
+ return(((code >= 0x0A80) && (code <= 0x0AFF)));
+}
+
+/**
+ * xmlUCSIsGurmukhi:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Gurmukhi UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsGurmukhi(int code) {
+ return(((code >= 0x0A00) && (code <= 0x0A7F)));
+}
+
+/**
+ * xmlUCSIsHalfwidthandFullwidthForms:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of HalfwidthandFullwidthForms UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsHalfwidthandFullwidthForms(int code) {
+ return(((code >= 0xFF00) && (code <= 0xFFEF)));
+}
+
+/**
+ * xmlUCSIsHangulCompatibilityJamo:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of HangulCompatibilityJamo UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsHangulCompatibilityJamo(int code) {
+ return(((code >= 0x3130) && (code <= 0x318F)));
+}
+
+/**
+ * xmlUCSIsHangulJamo:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of HangulJamo UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsHangulJamo(int code) {
+ return(((code >= 0x1100) && (code <= 0x11FF)));
+}
+
+/**
+ * xmlUCSIsHangulSyllables:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of HangulSyllables UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsHangulSyllables(int code) {
+ return(((code >= 0xAC00) && (code <= 0xD7AF)));
+}
+
+/**
+ * xmlUCSIsHanunoo:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Hanunoo UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsHanunoo(int code) {
+ return(((code >= 0x1720) && (code <= 0x173F)));
+}
+
+/**
+ * xmlUCSIsHebrew:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Hebrew UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsHebrew(int code) {
+ return(((code >= 0x0590) && (code <= 0x05FF)));
+}
+
+/**
+ * xmlUCSIsHighPrivateUseSurrogates:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of HighPrivateUseSurrogates UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsHighPrivateUseSurrogates(int code) {
+ return(((code >= 0xDB80) && (code <= 0xDBFF)));
+}
+
+/**
+ * xmlUCSIsHighSurrogates:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of HighSurrogates UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsHighSurrogates(int code) {
+ return(((code >= 0xD800) && (code <= 0xDB7F)));
+}
+
+/**
+ * xmlUCSIsHiragana:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Hiragana UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsHiragana(int code) {
+ return(((code >= 0x3040) && (code <= 0x309F)));
+}
+
+/**
+ * xmlUCSIsIPAExtensions:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of IPAExtensions UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsIPAExtensions(int code) {
+ return(((code >= 0x0250) && (code <= 0x02AF)));
+}
+
+/**
+ * xmlUCSIsIdeographicDescriptionCharacters:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of IdeographicDescriptionCharacters UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsIdeographicDescriptionCharacters(int code) {
+ return(((code >= 0x2FF0) && (code <= 0x2FFF)));
+}
+
+/**
+ * xmlUCSIsKanbun:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Kanbun UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsKanbun(int code) {
+ return(((code >= 0x3190) && (code <= 0x319F)));
+}
+
+/**
+ * xmlUCSIsKangxiRadicals:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of KangxiRadicals UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsKangxiRadicals(int code) {
+ return(((code >= 0x2F00) && (code <= 0x2FDF)));
+}
+
+/**
+ * xmlUCSIsKannada:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Kannada UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsKannada(int code) {
+ return(((code >= 0x0C80) && (code <= 0x0CFF)));
+}
+
+/**
+ * xmlUCSIsKatakana:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Katakana UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsKatakana(int code) {
+ return(((code >= 0x30A0) && (code <= 0x30FF)));
+}
+
+/**
+ * xmlUCSIsKatakanaPhoneticExtensions:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of KatakanaPhoneticExtensions UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsKatakanaPhoneticExtensions(int code) {
+ return(((code >= 0x31F0) && (code <= 0x31FF)));
+}
+
+/**
+ * xmlUCSIsKhmer:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Khmer UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsKhmer(int code) {
+ return(((code >= 0x1780) && (code <= 0x17FF)));
+}
+
+/**
+ * xmlUCSIsKhmerSymbols:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of KhmerSymbols UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsKhmerSymbols(int code) {
+ return(((code >= 0x19E0) && (code <= 0x19FF)));
+}
+
+/**
+ * xmlUCSIsLao:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Lao UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsLao(int code) {
+ return(((code >= 0x0E80) && (code <= 0x0EFF)));
+}
+
+/**
+ * xmlUCSIsLatin1Supplement:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Latin-1Supplement UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsLatin1Supplement(int code) {
+ return(((code >= 0x0080) && (code <= 0x00FF)));
+}
+
+/**
+ * xmlUCSIsLatinExtendedA:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of LatinExtended-A UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsLatinExtendedA(int code) {
+ return(((code >= 0x0100) && (code <= 0x017F)));
+}
+
+/**
+ * xmlUCSIsLatinExtendedB:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of LatinExtended-B UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsLatinExtendedB(int code) {
+ return(((code >= 0x0180) && (code <= 0x024F)));
+}
+
+/**
+ * xmlUCSIsLatinExtendedAdditional:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of LatinExtendedAdditional UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsLatinExtendedAdditional(int code) {
+ return(((code >= 0x1E00) && (code <= 0x1EFF)));
+}
+
+/**
+ * xmlUCSIsLetterlikeSymbols:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of LetterlikeSymbols UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsLetterlikeSymbols(int code) {
+ return(((code >= 0x2100) && (code <= 0x214F)));
+}
+
+/**
+ * xmlUCSIsLimbu:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Limbu UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsLimbu(int code) {
+ return(((code >= 0x1900) && (code <= 0x194F)));
+}
+
+/**
+ * xmlUCSIsLinearBIdeograms:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of LinearBIdeograms UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsLinearBIdeograms(int code) {
+ return(((code >= 0x10080) && (code <= 0x100FF)));
+}
+
+/**
+ * xmlUCSIsLinearBSyllabary:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of LinearBSyllabary UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsLinearBSyllabary(int code) {
+ return(((code >= 0x10000) && (code <= 0x1007F)));
+}
+
+/**
+ * xmlUCSIsLowSurrogates:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of LowSurrogates UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsLowSurrogates(int code) {
+ return(((code >= 0xDC00) && (code <= 0xDFFF)));
+}
+
+/**
+ * xmlUCSIsMalayalam:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Malayalam UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsMalayalam(int code) {
+ return(((code >= 0x0D00) && (code <= 0x0D7F)));
+}
+
+/**
+ * xmlUCSIsMathematicalAlphanumericSymbols:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of MathematicalAlphanumericSymbols UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsMathematicalAlphanumericSymbols(int code) {
+ return(((code >= 0x1D400) && (code <= 0x1D7FF)));
+}
+
+/**
+ * xmlUCSIsMathematicalOperators:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of MathematicalOperators UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsMathematicalOperators(int code) {
+ return(((code >= 0x2200) && (code <= 0x22FF)));
+}
+
+/**
+ * xmlUCSIsMiscellaneousMathematicalSymbolsA:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of MiscellaneousMathematicalSymbols-A UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsMiscellaneousMathematicalSymbolsA(int code) {
+ return(((code >= 0x27C0) && (code <= 0x27EF)));
+}
+
+/**
+ * xmlUCSIsMiscellaneousMathematicalSymbolsB:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of MiscellaneousMathematicalSymbols-B UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsMiscellaneousMathematicalSymbolsB(int code) {
+ return(((code >= 0x2980) && (code <= 0x29FF)));
+}
+
+/**
+ * xmlUCSIsMiscellaneousSymbols:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of MiscellaneousSymbols UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsMiscellaneousSymbols(int code) {
+ return(((code >= 0x2600) && (code <= 0x26FF)));
+}
+
+/**
+ * xmlUCSIsMiscellaneousSymbolsandArrows:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of MiscellaneousSymbolsandArrows UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsMiscellaneousSymbolsandArrows(int code) {
+ return(((code >= 0x2B00) && (code <= 0x2BFF)));
+}
+
+/**
+ * xmlUCSIsMiscellaneousTechnical:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of MiscellaneousTechnical UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsMiscellaneousTechnical(int code) {
+ return(((code >= 0x2300) && (code <= 0x23FF)));
+}
+
+/**
+ * xmlUCSIsMongolian:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Mongolian UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsMongolian(int code) {
+ return(((code >= 0x1800) && (code <= 0x18AF)));
+}
+
+/**
+ * xmlUCSIsMusicalSymbols:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of MusicalSymbols UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsMusicalSymbols(int code) {
+ return(((code >= 0x1D100) && (code <= 0x1D1FF)));
+}
+
+/**
+ * xmlUCSIsMyanmar:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Myanmar UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsMyanmar(int code) {
+ return(((code >= 0x1000) && (code <= 0x109F)));
+}
+
+/**
+ * xmlUCSIsNumberForms:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of NumberForms UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsNumberForms(int code) {
+ return(((code >= 0x2150) && (code <= 0x218F)));
+}
+
+/**
+ * xmlUCSIsOgham:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Ogham UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsOgham(int code) {
+ return(((code >= 0x1680) && (code <= 0x169F)));
+}
+
+/**
+ * xmlUCSIsOldItalic:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of OldItalic UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsOldItalic(int code) {
+ return(((code >= 0x10300) && (code <= 0x1032F)));
+}
+
+/**
+ * xmlUCSIsOpticalCharacterRecognition:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of OpticalCharacterRecognition UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsOpticalCharacterRecognition(int code) {
+ return(((code >= 0x2440) && (code <= 0x245F)));
+}
+
+/**
+ * xmlUCSIsOriya:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Oriya UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsOriya(int code) {
+ return(((code >= 0x0B00) && (code <= 0x0B7F)));
+}
+
+/**
+ * xmlUCSIsOsmanya:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Osmanya UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsOsmanya(int code) {
+ return(((code >= 0x10480) && (code <= 0x104AF)));
+}
+
+/**
+ * xmlUCSIsPhoneticExtensions:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of PhoneticExtensions UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsPhoneticExtensions(int code) {
+ return(((code >= 0x1D00) && (code <= 0x1D7F)));
+}
+
+/**
+ * xmlUCSIsPrivateUse:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of PrivateUse UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsPrivateUse(int code) {
+ return(((code >= 0xE000) && (code <= 0xF8FF)) ||
+ ((code >= 0xF0000) && (code <= 0xFFFFF)) ||
+ ((code >= 0x100000) && (code <= 0x10FFFF)));
+}
+
+/**
+ * xmlUCSIsPrivateUseArea:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of PrivateUseArea UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsPrivateUseArea(int code) {
+ return(((code >= 0xE000) && (code <= 0xF8FF)));
+}
+
+/**
+ * xmlUCSIsRunic:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Runic UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsRunic(int code) {
+ return(((code >= 0x16A0) && (code <= 0x16FF)));
+}
+
+/**
+ * xmlUCSIsShavian:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Shavian UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsShavian(int code) {
+ return(((code >= 0x10450) && (code <= 0x1047F)));
+}
+
+/**
+ * xmlUCSIsSinhala:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Sinhala UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsSinhala(int code) {
+ return(((code >= 0x0D80) && (code <= 0x0DFF)));
+}
+
+/**
+ * xmlUCSIsSmallFormVariants:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of SmallFormVariants UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsSmallFormVariants(int code) {
+ return(((code >= 0xFE50) && (code <= 0xFE6F)));
+}
+
+/**
+ * xmlUCSIsSpacingModifierLetters:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of SpacingModifierLetters UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsSpacingModifierLetters(int code) {
+ return(((code >= 0x02B0) && (code <= 0x02FF)));
+}
+
+/**
+ * xmlUCSIsSpecials:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Specials UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsSpecials(int code) {
+ return(((code >= 0xFFF0) && (code <= 0xFFFF)));
+}
+
+/**
+ * xmlUCSIsSuperscriptsandSubscripts:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of SuperscriptsandSubscripts UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsSuperscriptsandSubscripts(int code) {
+ return(((code >= 0x2070) && (code <= 0x209F)));
+}
+
+/**
+ * xmlUCSIsSupplementalArrowsA:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of SupplementalArrows-A UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsSupplementalArrowsA(int code) {
+ return(((code >= 0x27F0) && (code <= 0x27FF)));
+}
+
+/**
+ * xmlUCSIsSupplementalArrowsB:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of SupplementalArrows-B UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsSupplementalArrowsB(int code) {
+ return(((code >= 0x2900) && (code <= 0x297F)));
+}
+
+/**
+ * xmlUCSIsSupplementalMathematicalOperators:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of SupplementalMathematicalOperators UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsSupplementalMathematicalOperators(int code) {
+ return(((code >= 0x2A00) && (code <= 0x2AFF)));
+}
+
+/**
+ * xmlUCSIsSupplementaryPrivateUseAreaA:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of SupplementaryPrivateUseArea-A UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsSupplementaryPrivateUseAreaA(int code) {
+ return(((code >= 0xF0000) && (code <= 0xFFFFF)));
+}
+
+/**
+ * xmlUCSIsSupplementaryPrivateUseAreaB:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of SupplementaryPrivateUseArea-B UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsSupplementaryPrivateUseAreaB(int code) {
+ return(((code >= 0x100000) && (code <= 0x10FFFF)));
+}
+
+/**
+ * xmlUCSIsSyriac:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Syriac UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsSyriac(int code) {
+ return(((code >= 0x0700) && (code <= 0x074F)));
+}
+
+/**
+ * xmlUCSIsTagalog:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Tagalog UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsTagalog(int code) {
+ return(((code >= 0x1700) && (code <= 0x171F)));
+}
+
+/**
+ * xmlUCSIsTagbanwa:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Tagbanwa UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsTagbanwa(int code) {
+ return(((code >= 0x1760) && (code <= 0x177F)));
+}
+
+/**
+ * xmlUCSIsTags:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Tags UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsTags(int code) {
+ return(((code >= 0xE0000) && (code <= 0xE007F)));
+}
+
+/**
+ * xmlUCSIsTaiLe:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of TaiLe UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsTaiLe(int code) {
+ return(((code >= 0x1950) && (code <= 0x197F)));
+}
+
+/**
+ * xmlUCSIsTaiXuanJingSymbols:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of TaiXuanJingSymbols UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsTaiXuanJingSymbols(int code) {
+ return(((code >= 0x1D300) && (code <= 0x1D35F)));
+}
+
+/**
+ * xmlUCSIsTamil:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Tamil UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsTamil(int code) {
+ return(((code >= 0x0B80) && (code <= 0x0BFF)));
+}
+
+/**
+ * xmlUCSIsTelugu:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Telugu UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsTelugu(int code) {
+ return(((code >= 0x0C00) && (code <= 0x0C7F)));
+}
+
+/**
+ * xmlUCSIsThaana:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Thaana UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsThaana(int code) {
+ return(((code >= 0x0780) && (code <= 0x07BF)));
+}
+
+/**
+ * xmlUCSIsThai:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Thai UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsThai(int code) {
+ return(((code >= 0x0E00) && (code <= 0x0E7F)));
+}
+
+/**
+ * xmlUCSIsTibetan:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Tibetan UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsTibetan(int code) {
+ return(((code >= 0x0F00) && (code <= 0x0FFF)));
+}
+
+/**
+ * xmlUCSIsUgaritic:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Ugaritic UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsUgaritic(int code) {
+ return(((code >= 0x10380) && (code <= 0x1039F)));
+}
+
+/**
+ * xmlUCSIsUnifiedCanadianAboriginalSyllabics:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of UnifiedCanadianAboriginalSyllabics UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsUnifiedCanadianAboriginalSyllabics(int code) {
+ return(((code >= 0x1400) && (code <= 0x167F)));
+}
+
+/**
+ * xmlUCSIsVariationSelectors:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of VariationSelectors UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsVariationSelectors(int code) {
+ return(((code >= 0xFE00) && (code <= 0xFE0F)));
+}
+
+/**
+ * xmlUCSIsVariationSelectorsSupplement:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of VariationSelectorsSupplement UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsVariationSelectorsSupplement(int code) {
+ return(((code >= 0xE0100) && (code <= 0xE01EF)));
+}
+
+/**
+ * xmlUCSIsYiRadicals:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of YiRadicals UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsYiRadicals(int code) {
+ return(((code >= 0xA490) && (code <= 0xA4CF)));
+}
+
+/**
+ * xmlUCSIsYiSyllables:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of YiSyllables UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsYiSyllables(int code) {
+ return(((code >= 0xA000) && (code <= 0xA48F)));
+}
+
+/**
+ * xmlUCSIsYijingHexagramSymbols:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of YijingHexagramSymbols UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsYijingHexagramSymbols(int code) {
+ return(((code >= 0x4DC0) && (code <= 0x4DFF)));
+}
+
+/**
+ * xmlUCSIsBlock:
+ * @code: UCS code point
+ * @block: UCS block name
+ *
+ * Check whether the character is part of the UCS Block
+ *
+ * Returns 1 if true, 0 if false and -1 on unknown block
+ */
+int
+xmlUCSIsBlock(int code, const char *block) {
+ xmlIntFunc *func;
+
+ func = xmlUnicodeLookup(&xmlUnicodeBlockTbl, block);
+ if (func == NULL)
+ return (-1);
+ return (func(code));
+}
+
+/**
+ * xmlUCSIsCatC:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of C UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatC(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlCG));
+}
+
+/**
+ * xmlUCSIsCatCc:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Cc UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatCc(int code) {
+ return(((code >= 0x0) && (code <= 0x1f)) ||
+ ((code >= 0x7f) && (code <= 0x9f)));
+}
+
+/**
+ * xmlUCSIsCatCf:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Cf UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatCf(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlCfG));
+}
+
+/**
+ * xmlUCSIsCatCo:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Co UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatCo(int code) {
+ return((code == 0xe000) ||
+ (code == 0xf8ff) ||
+ (code == 0xf0000) ||
+ (code == 0xffffd) ||
+ (code == 0x100000) ||
+ (code == 0x10fffd));
+}
+
+/**
+ * xmlUCSIsCatCs:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Cs UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatCs(int code) {
+ return((code == 0xd800) ||
+ ((code >= 0xdb7f) && (code <= 0xdb80)) ||
+ ((code >= 0xdbff) && (code <= 0xdc00)) ||
+ (code == 0xdfff));
+}
+
+/**
+ * xmlUCSIsCatL:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of L UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatL(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlLG));
+}
+
+/**
+ * xmlUCSIsCatLl:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Ll UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatLl(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlLlG));
+}
+
+/**
+ * xmlUCSIsCatLm:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Lm UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatLm(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlLmG));
+}
+
+/**
+ * xmlUCSIsCatLo:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Lo UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatLo(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlLoG));
+}
+
+/**
+ * xmlUCSIsCatLt:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Lt UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatLt(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlLtG));
+}
+
+/**
+ * xmlUCSIsCatLu:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Lu UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatLu(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlLuG));
+}
+
+/**
+ * xmlUCSIsCatM:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of M UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatM(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlMG));
+}
+
+/**
+ * xmlUCSIsCatMc:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Mc UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatMc(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlMcG));
+}
+
+/**
+ * xmlUCSIsCatMe:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Me UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatMe(int code) {
+ return(((code >= 0x488) && (code <= 0x489)) ||
+ (code == 0x6de) ||
+ ((code >= 0x20dd) && (code <= 0x20e0)) ||
+ ((code >= 0x20e2) && (code <= 0x20e4)));
+}
+
+/**
+ * xmlUCSIsCatMn:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Mn UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatMn(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlMnG));
+}
+
+/**
+ * xmlUCSIsCatN:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of N UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatN(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlNG));
+}
+
+/**
+ * xmlUCSIsCatNd:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Nd UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatNd(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlNdG));
+}
+
+/**
+ * xmlUCSIsCatNl:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Nl UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatNl(int code) {
+ return(((code >= 0x16ee) && (code <= 0x16f0)) ||
+ ((code >= 0x2160) && (code <= 0x2183)) ||
+ (code == 0x3007) ||
+ ((code >= 0x3021) && (code <= 0x3029)) ||
+ ((code >= 0x3038) && (code <= 0x303a)) ||
+ (code == 0x1034a));
+}
+
+/**
+ * xmlUCSIsCatNo:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of No UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatNo(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlNoG));
+}
+
+/**
+ * xmlUCSIsCatP:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of P UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatP(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlPG));
+}
+
+/**
+ * xmlUCSIsCatPc:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Pc UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatPc(int code) {
+ return((code == 0x5f) ||
+ ((code >= 0x203f) && (code <= 0x2040)) ||
+ (code == 0x2054) ||
+ (code == 0x30fb) ||
+ ((code >= 0xfe33) && (code <= 0xfe34)) ||
+ ((code >= 0xfe4d) && (code <= 0xfe4f)) ||
+ (code == 0xff3f) ||
+ (code == 0xff65));
+}
+
+/**
+ * xmlUCSIsCatPd:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Pd UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatPd(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlPdG));
+}
+
+/**
+ * xmlUCSIsCatPe:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Pe UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatPe(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlPeG));
+}
+
+/**
+ * xmlUCSIsCatPf:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Pf UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatPf(int code) {
+ return((code == 0xbb) ||
+ (code == 0x2019) ||
+ (code == 0x201d) ||
+ (code == 0x203a));
+}
+
+/**
+ * xmlUCSIsCatPi:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Pi UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatPi(int code) {
+ return((code == 0xab) ||
+ (code == 0x2018) ||
+ ((code >= 0x201b) && (code <= 0x201c)) ||
+ (code == 0x201f) ||
+ (code == 0x2039));
+}
+
+/**
+ * xmlUCSIsCatPo:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Po UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatPo(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlPoG));
+}
+
+/**
+ * xmlUCSIsCatPs:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Ps UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatPs(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlPsG));
+}
+
+/**
+ * xmlUCSIsCatS:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of S UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatS(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlSG));
+}
+
+/**
+ * xmlUCSIsCatSc:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Sc UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatSc(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlScG));
+}
+
+/**
+ * xmlUCSIsCatSk:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Sk UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatSk(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlSkG));
+}
+
+/**
+ * xmlUCSIsCatSm:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Sm UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatSm(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlSmG));
+}
+
+/**
+ * xmlUCSIsCatSo:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of So UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatSo(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlSoG));
+}
+
+/**
+ * xmlUCSIsCatZ:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Z UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatZ(int code) {
+ return(xmlCharInRange((unsigned int)code, &xmlZG));
+}
+
+/**
+ * xmlUCSIsCatZl:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Zl UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatZl(int code) {
+ return((code == 0x2028));
+}
+
+/**
+ * xmlUCSIsCatZp:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Zp UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatZp(int code) {
+ return((code == 0x2029));
+}
+
+/**
+ * xmlUCSIsCatZs:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Zs UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatZs(int code) {
+ return((code == 0x20) ||
+ (code == 0xa0) ||
+ (code == 0x1680) ||
+ (code == 0x180e) ||
+ ((code >= 0x2000) && (code <= 0x200a)) ||
+ (code == 0x202f) ||
+ (code == 0x205f) ||
+ (code == 0x3000));
+}
+
+/**
+ * xmlUCSIsCat:
+ * @code: UCS code point
+ * @cat: UCS Category name
+ *
+ * Check whether the character is part of the UCS Category
+ *
+ * Returns 1 if true, 0 if false and -1 on unknown category
+ */
+int
+xmlUCSIsCat(int code, const char *cat) {
+ xmlIntFunc *func;
+
+ func = xmlUnicodeLookup(&xmlUnicodeCatTbl, cat);
+ if (func == NULL)
+ return (-1);
+ return (func(code));
+}
+
+#include <xsde/c/post.h>
diff --git a/libxsde/xsde/c/regexp/xmlunicode.h b/libxsde/xsde/c/regexp/xmlunicode.h
new file mode 100644
index 0000000..e9bea46
--- /dev/null
+++ b/libxsde/xsde/c/regexp/xmlunicode.h
@@ -0,0 +1,195 @@
+/*
+ * Summary: Unicode character APIs
+ * Description: API for the Unicode character APIs
+ *
+ * This file is automatically generated from the
+ * UCS description files of the Unicode Character Database
+ * http://www.unicode.org/Public/4.0-Update1/UCD-4.0.1.html
+ * using the genUnicode.py Python script.
+ *
+ * Generation date: Mon Mar 27 11:09:52 2006
+ * Sources: Blocks-4.0.1.txt UnicodeData-4.0.1.txt
+ */
+
+#ifndef __XML_UNICODE_H__
+#define __XML_UNICODE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int xmlUCSIsAegeanNumbers (int code);
+int xmlUCSIsAlphabeticPresentationForms (int code);
+int xmlUCSIsArabic (int code);
+int xmlUCSIsArabicPresentationFormsA (int code);
+int xmlUCSIsArabicPresentationFormsB (int code);
+int xmlUCSIsArmenian (int code);
+int xmlUCSIsArrows (int code);
+int xmlUCSIsBasicLatin (int code);
+int xmlUCSIsBengali (int code);
+int xmlUCSIsBlockElements (int code);
+int xmlUCSIsBopomofo (int code);
+int xmlUCSIsBopomofoExtended (int code);
+int xmlUCSIsBoxDrawing (int code);
+int xmlUCSIsBraillePatterns (int code);
+int xmlUCSIsBuhid (int code);
+int xmlUCSIsByzantineMusicalSymbols (int code);
+int xmlUCSIsCJKCompatibility (int code);
+int xmlUCSIsCJKCompatibilityForms (int code);
+int xmlUCSIsCJKCompatibilityIdeographs (int code);
+int xmlUCSIsCJKCompatibilityIdeographsSupplement (int code);
+int xmlUCSIsCJKRadicalsSupplement (int code);
+int xmlUCSIsCJKSymbolsandPunctuation (int code);
+int xmlUCSIsCJKUnifiedIdeographs (int code);
+int xmlUCSIsCJKUnifiedIdeographsExtensionA (int code);
+int xmlUCSIsCJKUnifiedIdeographsExtensionB (int code);
+int xmlUCSIsCherokee (int code);
+int xmlUCSIsCombiningDiacriticalMarks (int code);
+int xmlUCSIsCombiningDiacriticalMarksforSymbols (int code);
+int xmlUCSIsCombiningHalfMarks (int code);
+int xmlUCSIsCombiningMarksforSymbols (int code);
+int xmlUCSIsControlPictures (int code);
+int xmlUCSIsCurrencySymbols (int code);
+int xmlUCSIsCypriotSyllabary (int code);
+int xmlUCSIsCyrillic (int code);
+int xmlUCSIsCyrillicSupplement (int code);
+int xmlUCSIsDeseret (int code);
+int xmlUCSIsDevanagari (int code);
+int xmlUCSIsDingbats (int code);
+int xmlUCSIsEnclosedAlphanumerics (int code);
+int xmlUCSIsEnclosedCJKLettersandMonths (int code);
+int xmlUCSIsEthiopic (int code);
+int xmlUCSIsGeneralPunctuation (int code);
+int xmlUCSIsGeometricShapes (int code);
+int xmlUCSIsGeorgian (int code);
+int xmlUCSIsGothic (int code);
+int xmlUCSIsGreek (int code);
+int xmlUCSIsGreekExtended (int code);
+int xmlUCSIsGreekandCoptic (int code);
+int xmlUCSIsGujarati (int code);
+int xmlUCSIsGurmukhi (int code);
+int xmlUCSIsHalfwidthandFullwidthForms (int code);
+int xmlUCSIsHangulCompatibilityJamo (int code);
+int xmlUCSIsHangulJamo (int code);
+int xmlUCSIsHangulSyllables (int code);
+int xmlUCSIsHanunoo (int code);
+int xmlUCSIsHebrew (int code);
+int xmlUCSIsHighPrivateUseSurrogates (int code);
+int xmlUCSIsHighSurrogates (int code);
+int xmlUCSIsHiragana (int code);
+int xmlUCSIsIPAExtensions (int code);
+int xmlUCSIsIdeographicDescriptionCharacters (int code);
+int xmlUCSIsKanbun (int code);
+int xmlUCSIsKangxiRadicals (int code);
+int xmlUCSIsKannada (int code);
+int xmlUCSIsKatakana (int code);
+int xmlUCSIsKatakanaPhoneticExtensions (int code);
+int xmlUCSIsKhmer (int code);
+int xmlUCSIsKhmerSymbols (int code);
+int xmlUCSIsLao (int code);
+int xmlUCSIsLatin1Supplement (int code);
+int xmlUCSIsLatinExtendedA (int code);
+int xmlUCSIsLatinExtendedB (int code);
+int xmlUCSIsLatinExtendedAdditional (int code);
+int xmlUCSIsLetterlikeSymbols (int code);
+int xmlUCSIsLimbu (int code);
+int xmlUCSIsLinearBIdeograms (int code);
+int xmlUCSIsLinearBSyllabary (int code);
+int xmlUCSIsLowSurrogates (int code);
+int xmlUCSIsMalayalam (int code);
+int xmlUCSIsMathematicalAlphanumericSymbols (int code);
+int xmlUCSIsMathematicalOperators (int code);
+int xmlUCSIsMiscellaneousMathematicalSymbolsA (int code);
+int xmlUCSIsMiscellaneousMathematicalSymbolsB (int code);
+int xmlUCSIsMiscellaneousSymbols (int code);
+int xmlUCSIsMiscellaneousSymbolsandArrows (int code);
+int xmlUCSIsMiscellaneousTechnical (int code);
+int xmlUCSIsMongolian (int code);
+int xmlUCSIsMusicalSymbols (int code);
+int xmlUCSIsMyanmar (int code);
+int xmlUCSIsNumberForms (int code);
+int xmlUCSIsOgham (int code);
+int xmlUCSIsOldItalic (int code);
+int xmlUCSIsOpticalCharacterRecognition (int code);
+int xmlUCSIsOriya (int code);
+int xmlUCSIsOsmanya (int code);
+int xmlUCSIsPhoneticExtensions (int code);
+int xmlUCSIsPrivateUse (int code);
+int xmlUCSIsPrivateUseArea (int code);
+int xmlUCSIsRunic (int code);
+int xmlUCSIsShavian (int code);
+int xmlUCSIsSinhala (int code);
+int xmlUCSIsSmallFormVariants (int code);
+int xmlUCSIsSpacingModifierLetters (int code);
+int xmlUCSIsSpecials (int code);
+int xmlUCSIsSuperscriptsandSubscripts (int code);
+int xmlUCSIsSupplementalArrowsA (int code);
+int xmlUCSIsSupplementalArrowsB (int code);
+int xmlUCSIsSupplementalMathematicalOperators (int code);
+int xmlUCSIsSupplementaryPrivateUseAreaA (int code);
+int xmlUCSIsSupplementaryPrivateUseAreaB (int code);
+int xmlUCSIsSyriac (int code);
+int xmlUCSIsTagalog (int code);
+int xmlUCSIsTagbanwa (int code);
+int xmlUCSIsTags (int code);
+int xmlUCSIsTaiLe (int code);
+int xmlUCSIsTaiXuanJingSymbols (int code);
+int xmlUCSIsTamil (int code);
+int xmlUCSIsTelugu (int code);
+int xmlUCSIsThaana (int code);
+int xmlUCSIsThai (int code);
+int xmlUCSIsTibetan (int code);
+int xmlUCSIsUgaritic (int code);
+int xmlUCSIsUnifiedCanadianAboriginalSyllabics (int code);
+int xmlUCSIsVariationSelectors (int code);
+int xmlUCSIsVariationSelectorsSupplement (int code);
+int xmlUCSIsYiRadicals (int code);
+int xmlUCSIsYiSyllables (int code);
+int xmlUCSIsYijingHexagramSymbols (int code);
+
+int xmlUCSIsBlock (int code, const char *block);
+
+int xmlUCSIsCatC (int code);
+int xmlUCSIsCatCc (int code);
+int xmlUCSIsCatCf (int code);
+int xmlUCSIsCatCo (int code);
+int xmlUCSIsCatCs (int code);
+int xmlUCSIsCatL (int code);
+int xmlUCSIsCatLl (int code);
+int xmlUCSIsCatLm (int code);
+int xmlUCSIsCatLo (int code);
+int xmlUCSIsCatLt (int code);
+int xmlUCSIsCatLu (int code);
+int xmlUCSIsCatM (int code);
+int xmlUCSIsCatMc (int code);
+int xmlUCSIsCatMe (int code);
+int xmlUCSIsCatMn (int code);
+int xmlUCSIsCatN (int code);
+int xmlUCSIsCatNd (int code);
+int xmlUCSIsCatNl (int code);
+int xmlUCSIsCatNo (int code);
+int xmlUCSIsCatP (int code);
+int xmlUCSIsCatPc (int code);
+int xmlUCSIsCatPd (int code);
+int xmlUCSIsCatPe (int code);
+int xmlUCSIsCatPf (int code);
+int xmlUCSIsCatPi (int code);
+int xmlUCSIsCatPo (int code);
+int xmlUCSIsCatPs (int code);
+int xmlUCSIsCatS (int code);
+int xmlUCSIsCatSc (int code);
+int xmlUCSIsCatSk (int code);
+int xmlUCSIsCatSm (int code);
+int xmlUCSIsCatSo (int code);
+int xmlUCSIsCatZ (int code);
+int xmlUCSIsCatZl (int code);
+int xmlUCSIsCatZp (int code);
+int xmlUCSIsCatZs (int code);
+
+int xmlUCSIsCat (int code, const char *cat);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __XML_UNICODE_H__ */
diff --git a/libxsde/xsde/cxx/parser/validating/string-common.cxx b/libxsde/xsde/cxx/parser/validating/string-common.cxx
index 9558e30..1e3d8ba 100644
--- a/libxsde/xsde/cxx/parser/validating/string-common.cxx
+++ b/libxsde/xsde/cxx/parser/validating/string-common.cxx
@@ -3,6 +3,14 @@
// copyright : Copyright (c) 2005-2010 Code Synthesis Tools CC
// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+#include <xsde/cxx/config.hxx>
+
+#ifdef XSDE_REGEXP
+#ifdef XSDE_EXCEPTIONS
+# include <new> // std::bad_alloc
+#endif
+#endif
+
#include <xsde/cxx/string-search.hxx>
#include <xsde/cxx/parser/validating/string-common.hxx>
@@ -116,6 +124,41 @@ namespace xsde
}
}
+#ifdef XSDE_REGEXP
+ if (f.pattern_set_ != 0)
+ {
+ if (f.pattern_set_ == 1)
+ {
+ xmlRegexpPtr r = xmlRegexpCompile (
+ reinterpret_cast<const xmlChar*> (f.pattern_.str));
+
+ if (r == 0)
+ {
+#ifdef XSDE_EXCEPTIONS
+ throw std::bad_alloc ();
+#else
+ ctx.sys_error (sys_error::no_memory);
+ return false;
+#endif
+
+ }
+
+ string_facets::facets& t =
+ const_cast<string_facets::facets&> (f);
+
+ t.pattern_.regexp = r;
+ t.pattern_set_ = 2;
+ }
+
+ if (xmlRegexpExec (
+ f.pattern_.regexp,
+ reinterpret_cast<const xmlChar*> (s)) != 1)
+ {
+ ctx.schema_error (schema_error::value_pattern_mismatch);
+ return false;
+ }
+ }
+#endif
return true;
}
}
diff --git a/libxsde/xsde/cxx/parser/validating/xml-schema-pskel.hxx b/libxsde/xsde/cxx/parser/validating/xml-schema-pskel.hxx
index 1ef1005..8823846 100644
--- a/libxsde/xsde/cxx/parser/validating/xml-schema-pskel.hxx
+++ b/libxsde/xsde/cxx/parser/validating/xml-schema-pskel.hxx
@@ -12,6 +12,10 @@
# include <string>
#endif
+#ifdef XSDE_REGEXP
+# include <xsde/c/regexp/xmlregexp.h>
+#endif
+
#include <xsde/cxx/parser/xml-schema.hxx>
#include <xsde/cxx/parser/validating/parser.hxx>
@@ -662,6 +666,9 @@ namespace xsde
struct string_facets
{
string_facets ();
+#ifdef XSDE_REGEXP
+ ~string_facets ();
+#endif
void
_length_facet (size_t);
@@ -678,6 +685,9 @@ namespace xsde
void
_enumeration_facet (const char* const*, size_t count);
+ void
+ _pattern_facet (const char*);
+
public:
struct facets
{
@@ -688,10 +698,24 @@ namespace xsde
const char* const* enum_;
size_t enum_count_;
+#ifdef XSDE_REGEXP
+ union
+ {
+ const char* str;
+ xmlRegexpPtr regexp;
+ } pattern_;
+#endif
unsigned int length_set_ : 1;
unsigned int min_length_set_ : 1;
unsigned int max_length_set_ : 1;
+#ifdef XSDE_REGEXP
+ // 0 - not set
+ // 1 - string
+ // 2 - compiled
+ //
+ unsigned int pattern_set_: 2;
+#endif
// 0 - preserve
// 1 - replace
// 2 - collapse
diff --git a/libxsde/xsde/cxx/parser/validating/xml-schema-pskel.ixx b/libxsde/xsde/cxx/parser/validating/xml-schema-pskel.ixx
index 0a8f99a..a9d01f1 100644
--- a/libxsde/xsde/cxx/parser/validating/xml-schema-pskel.ixx
+++ b/libxsde/xsde/cxx/parser/validating/xml-schema-pskel.ixx
@@ -458,7 +458,20 @@ namespace xsde
facets_.enum_ = 0;
facets_.enum_count_ = 0;
+
+#ifdef XSDE_REGEXP
+ facets_.pattern_set_ = 0;
+#endif
+ }
+
+#ifdef XSDE_REGEXP
+ inline string_facets::
+ ~string_facets ()
+ {
+ if (facets_.pattern_set_ == 2)
+ xmlRegFreeRegexp (facets_.pattern_.regexp);
}
+#endif
inline void string_facets::
_length_facet (size_t v)
@@ -494,6 +507,20 @@ namespace xsde
facets_.enum_count_ = count;
}
+#ifndef XSDE_REGEXP
+ inline void string_facets::
+ _pattern_facet (const char*)
+ {
+ }
+#else
+ inline void string_facets::
+ _pattern_facet (const char* s)
+ {
+ facets_.pattern_.str = s;
+ facets_.pattern_set_ = 1;
+ }
+#endif
+
// string_pskel
//
#ifdef XSDE_REUSE_STYLE_TIEIN
diff --git a/libxsde/xsde/cxx/schema-error.cxx b/libxsde/xsde/cxx/schema-error.cxx
index a37ea28..aee2642 100644
--- a/libxsde/xsde/cxx/schema-error.cxx
+++ b/libxsde/xsde/cxx/schema-error.cxx
@@ -59,6 +59,7 @@ namespace xsde
"value is greater than maximum allowed",
"value is less than minimum allowed",
"value is not in enumeration",
+ "value does not match pattern",
"length is greater than maximum allowed",
"length is less than minimum allowed",
"length is not equal to prescribed length",
diff --git a/libxsde/xsde/cxx/schema-error.hxx b/libxsde/xsde/cxx/schema-error.hxx
index f442bf6..a20deac 100644
--- a/libxsde/xsde/cxx/schema-error.hxx
+++ b/libxsde/xsde/cxx/schema-error.hxx
@@ -62,6 +62,7 @@ namespace xsde
value_greater_than_max,
value_less_than_min,
value_not_in_enumeration,
+ value_pattern_mismatch,
length_greater_than_max,
length_less_than_min,
length_not_equal_prescribed,
diff --git a/libxsde/xsde/cxx/serializer/validating/string-common.cxx b/libxsde/xsde/cxx/serializer/validating/string-common.cxx
index 5dab6c9..53958a4 100644
--- a/libxsde/xsde/cxx/serializer/validating/string-common.cxx
+++ b/libxsde/xsde/cxx/serializer/validating/string-common.cxx
@@ -3,7 +3,16 @@
// copyright : Copyright (c) 2005-2010 Code Synthesis Tools CC
// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+#include <xsde/cxx/config.hxx>
+
#include <string.h> // strlen
+
+#ifdef XSDE_REGEXP
+#ifdef XSDE_EXCEPTIONS
+# include <new> // std::bad_alloc
+#endif
+#endif
+
#include <xsde/cxx/string-search.hxx>
#include <xsde/cxx/serializer/validating/string-common.hxx>
@@ -23,7 +32,11 @@ namespace xsde
if (f.length_set_ ||
f.min_length_set_ ||
f.max_length_set_ ||
- f.enum_count_ != 0)
+ f.enum_count_ != 0
+#ifdef XSDE_REGEXP
+ || f.pattern_set_ != 0
+#endif
+ )
{
return validate_facets (s, strlen (s), f, ctx);
}
@@ -64,6 +77,41 @@ namespace xsde
}
}
+#ifdef XSDE_REGEXP
+ if (f.pattern_set_ != 0)
+ {
+ if (f.pattern_set_ == 1)
+ {
+ xmlRegexpPtr r = xmlRegexpCompile (
+ reinterpret_cast<const xmlChar*> (f.pattern_.str));
+
+ if (r == 0)
+ {
+#ifdef XSDE_EXCEPTIONS
+ throw std::bad_alloc ();
+#else
+ ctx.sys_error (sys_error::no_memory);
+ return false;
+#endif
+
+ }
+
+ string_facets::facets& t =
+ const_cast<string_facets::facets&> (f);
+
+ t.pattern_.regexp = r;
+ t.pattern_set_ = 2;
+ }
+
+ if (xmlRegexpExec (
+ f.pattern_.regexp,
+ reinterpret_cast<const xmlChar*> (s)) != 1)
+ {
+ ctx.schema_error (schema_error::value_pattern_mismatch);
+ return false;
+ }
+ }
+#endif
return true;
}
}
diff --git a/libxsde/xsde/cxx/serializer/validating/xml-schema-sskel.hxx b/libxsde/xsde/cxx/serializer/validating/xml-schema-sskel.hxx
index a97742a..b93012d 100644
--- a/libxsde/xsde/cxx/serializer/validating/xml-schema-sskel.hxx
+++ b/libxsde/xsde/cxx/serializer/validating/xml-schema-sskel.hxx
@@ -12,6 +12,10 @@
# include <string>
#endif
+#ifdef XSDE_REGEXP
+# include <xsde/c/regexp/xmlregexp.h>
+#endif
+
#include <xsde/cxx/serializer/xml-schema.hxx>
#include <xsde/cxx/serializer/validating/serializer.hxx>
@@ -643,6 +647,9 @@ namespace xsde
struct string_facets
{
string_facets ();
+#ifdef XSDE_REGEXP
+ ~string_facets ();
+#endif
void
_length_facet (size_t);
@@ -656,6 +663,9 @@ namespace xsde
void
_enumeration_facet (const char* const*, size_t count);
+ void
+ _pattern_facet (const char*);
+
public:
struct facets
{
@@ -666,9 +676,24 @@ namespace xsde
const char* const* enum_;
size_t enum_count_;
+#ifdef XSDE_REGEXP
+ union
+ {
+ const char* str;
+ xmlRegexpPtr regexp;
+ } pattern_;
+#endif
unsigned int length_set_ : 1;
unsigned int min_length_set_ : 1;
unsigned int max_length_set_ : 1;
+
+#ifdef XSDE_REGEXP
+ // 0 - not set
+ // 1 - string
+ // 2 - compiled
+ //
+ unsigned int pattern_set_: 2;
+#endif
};
protected:
diff --git a/libxsde/xsde/cxx/serializer/validating/xml-schema-sskel.ixx b/libxsde/xsde/cxx/serializer/validating/xml-schema-sskel.ixx
index d62fdd4..35ce5d9 100644
--- a/libxsde/xsde/cxx/serializer/validating/xml-schema-sskel.ixx
+++ b/libxsde/xsde/cxx/serializer/validating/xml-schema-sskel.ixx
@@ -458,7 +458,20 @@ namespace xsde
facets_.enum_ = 0;
facets_.enum_count_ = 0;
+
+#ifdef XSDE_REGEXP
+ facets_.pattern_set_ = 0;
+#endif
+ }
+
+#ifdef XSDE_REGEXP
+ inline string_facets::
+ ~string_facets ()
+ {
+ if (facets_.pattern_set_ == 2)
+ xmlRegFreeRegexp (facets_.pattern_.regexp);
}
+#endif
inline void string_facets::
_length_facet (size_t v)
@@ -488,6 +501,20 @@ namespace xsde
facets_.enum_count_ = count;
}
+#ifndef XSDE_REGEXP
+ inline void string_facets::
+ _pattern_facet (const char*)
+ {
+ }
+#else
+ inline void string_facets::
+ _pattern_facet (const char* s)
+ {
+ facets_.pattern_.str = s;
+ facets_.pattern_set_ = 1;
+ }
+#endif
+
// string_sskel
//
#ifdef XSDE_REUSE_STYLE_TIEIN
diff --git a/libxsde/xsde/makefile b/libxsde/xsde/makefile
index 51edf31..9787855 100644
--- a/libxsde/xsde/makefile
+++ b/libxsde/xsde/makefile
@@ -8,6 +8,12 @@ include $(dir $(lastword $(MAKEFILE_LIST)))../../build/bootstrap.make
c_tun := c/expat/xmlparse.c c/expat/xmlrole.c c/expat/xmltok.c
c_tun += c/genx/genx.c c/genx/char-props.c
+ifneq ($(xsde_parser_validation)$(xsde_serializer_validation),nn)
+ifeq ($(xsde_regexp),y)
+c_tun += c/regexp/chvalid.c c/regexp/xmlunicode.c c/regexp/xmlregexp.c
+endif
+endif
+
ifeq ($(xsde_custom_allocator),y)
ifeq ($(xsde_default_allocator),y)
c_tun += allocator.c
@@ -40,6 +46,11 @@ endif
ifneq ($(xsde_parser_validation)$(xsde_serializer_validation),nn)
cxx_tun += cxx/schema-error.cxx
+
+ifeq ($(xsde_regexp),y)
+cxx_tun += cxx/schema-error.cxx
+endif
+
endif
ifeq ($(xsde_polymorphic),y)
@@ -590,6 +601,11 @@ ifeq ($(xsde_serializer_validation),y)
else
@echo '#undef XSDE_SERIALIZER_VALIDATION' >>$@
endif
+ifeq ($(xsde_regexp),y)
+ @echo '#define XSDE_REGEXP' >>$@
+else
+ @echo '#undef XSDE_REGEXP' >>$@
+endif
ifeq ($(xsde_reuse_style),mixin)
@echo '#define XSDE_REUSE_STYLE_MIXIN' >>$@
else
diff --git a/tests/cxx/hybrid/makefile b/tests/cxx/hybrid/makefile
index cef5ffc..af610f5 100644
--- a/tests/cxx/hybrid/makefile
+++ b/tests/cxx/hybrid/makefile
@@ -11,7 +11,7 @@ include $(dir $(lastword $(MAKEFILE_LIST)))../../../build/bootstrap.make
all_tests := sequences polymorphism iterator built-in default enumeration \
iso8859-1 list recursive test-template union binary/cdr binary/xdr choice \
-clone
+clone pattern
build_tests := sequences
@@ -26,7 +26,7 @@ endif
endif
ifeq ($(xsde_iostream),y)
-build_tests += built-in default enumeration list test-template union
+build_tests += built-in default enumeration list test-template union pattern
ifeq ($(xsde_encoding),iso8859-1)
build_tests += iso8859-1
diff --git a/tests/cxx/hybrid/pattern/driver.cxx b/tests/cxx/hybrid/pattern/driver.cxx
new file mode 100644
index 0000000..b9d8663
--- /dev/null
+++ b/tests/cxx/hybrid/pattern/driver.cxx
@@ -0,0 +1,56 @@
+// file : tests/cxx/hybrid/pattern/driver.cxx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2006-2010 Code Synthesis Tools CC
+// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+// Test the pattern facet validation.
+//
+
+#include <iostream>
+
+#include "test.hxx"
+#include "test-pimpl.hxx"
+#include "test-simpl.hxx"
+
+using namespace std;
+using namespace test;
+
+int
+main (int argc, char* argv[])
+{
+ if (argc != 2)
+ {
+ cerr << "usage: " << argv[0] << " test.xml" << endl;
+ return 1;
+ }
+
+ // Parse.
+ //
+ root_paggr root_p;
+
+ xml_schema::document_pimpl doc_p (
+ root_p.root_parser (),
+ root_p.root_namespace (),
+ root_p.root_name ());
+
+ root_p.pre ();
+ doc_p.parse (argv[1]);
+ type* r = root_p.post ();
+
+ // Serialize.
+ //
+ root_saggr root_s;
+
+ xml_schema::document_simpl doc_s (
+ root_s.root_serializer (),
+ root_s.root_namespace (),
+ root_s.root_name ());
+
+ doc_s.add_prefix ("t", "test");
+
+ root_s.pre (*r);
+ doc_s.serialize (cout, xml_schema::document_simpl::pretty_print);
+ root_s.post ();
+
+ delete r;
+}
diff --git a/tests/cxx/hybrid/pattern/makefile b/tests/cxx/hybrid/pattern/makefile
new file mode 100644
index 0000000..8313805
--- /dev/null
+++ b/tests/cxx/hybrid/pattern/makefile
@@ -0,0 +1,108 @@
+# file : tests/cxx/hybrid/pattern/makefile
+# author : Boris Kolpackov <boris@codesynthesis.com>
+# copyright : Copyright (c) 2006-2010 Code Synthesis Tools CC
+# license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+include $(dir $(lastword $(MAKEFILE_LIST)))../../../../build/bootstrap.make
+
+xsd := test.xsd
+cxx := driver.cxx
+
+obj := $(addprefix $(out_base)/,\
+$(cxx:.cxx=.o) \
+$(xsd:.xsd=.o) \
+$(xsd:.xsd=-pskel.o) \
+$(xsd:.xsd=-pimpl.o) \
+$(xsd:.xsd=-sskel.o) \
+$(xsd:.xsd=-simpl.o))
+
+dep := $(obj:.o=.o.d)
+
+xsde.l := $(out_root)/libxsde/xsde/xsde.l
+xsde.l.cpp-options := $(out_root)/libxsde/xsde/xsde.l.cpp-options
+
+driver := $(out_base)/driver
+test := $(out_base)/.test
+dist := $(out_base)/.dist
+dist-win := $(out_base)/.dist-win
+clean := $(out_base)/.clean
+
+
+# Build.
+#
+$(driver): $(obj) $(xsde.l)
+
+$(obj) $(dep): $(xsde.l.cpp-options)
+
+genf := $(xsd:.xsd=.hxx) $(xsd:.xsd=.cxx) \
+ $(xsd:.xsd=-pskel.hxx) $(xsd:.xsd=-pskel.cxx) \
+ $(xsd:.xsd=-pimpl.hxx) $(xsd:.xsd=-pimpl.cxx) \
+ $(xsd:.xsd=-sskel.hxx) $(xsd:.xsd=-sskel.cxx) \
+ $(xsd:.xsd=-simpl.hxx) $(xsd:.xsd=-simpl.cxx)
+
+gen := $(addprefix $(out_base)/,$(genf))
+
+$(gen): $(out_root)/xsde/xsde
+$(gen): xsde := $(out_root)/xsde/xsde
+$(gen) $(dist) $(dist-win): xsde_options += --generate-parser \
+--generate-serializer --generate-aggregate
+
+$(call include-dep,$(dep))
+
+# Convenience alias for default target.
+#
+$(out_base)/: $(driver)
+
+
+# Test.
+#
+$(test): driver := $(driver)
+$(test): $(driver) $(src_base)/test-000.xml $(src_base)/test-000.std
+ $(call message,test $$1,$$1 $(src_base)/test-000.xml | diff -u $(src_base)/test-000.std -,$(driver))
+
+
+# Dist.
+#
+$(dist) $(dist-win): opt := -src $(src_base) -cmd cxx-hybrid -xsd "$(xsd)" \
+-cxx "$(cxx)" -gen "$(genf)" -opt "$(xsde_options)" -out $(dist_prefix)
+
+$(dist):
+ $(call message,install $(src_base),$(scf_root)/dist $(opt))
+
+$(dist-win):
+ $(call message,install $(src_base),$(scf_root)/dist -win $(opt))
+
+
+# Clean.
+#
+$(clean): $(driver).o.clean \
+ $(addsuffix .cxx.clean,$(obj)) \
+ $(addsuffix .cxx.clean,$(dep)) \
+ $(addprefix $(out_base)/,$(xsd:.xsd=.cxx.xsd.clean))
+
+
+# Generated .gitignore.
+#
+ifeq ($(out_base),$(src_base))
+$(gen): | $(out_base)/.gitignore
+$(driver): | $(out_base)/.gitignore
+
+$(out_base)/.gitignore: files := driver $(genf)
+$(clean): $(out_base)/.gitignore.clean
+
+$(call include,$(bld_root)/git/gitignore.make)
+endif
+
+
+# How to.
+#
+$(call include,$(bld_root)/cxx/o-e.make)
+$(call include,$(bld_root)/cxx/cxx-o.make)
+$(call include,$(bld_root)/cxx/cxx-d.make)
+$(call include,$(scf_root)/xsde/hybrid/xsd-cxx.make)
+
+
+# Dependencies.
+#
+$(call import,$(src_root)/xsde/makefile)
+$(call import,$(src_root)/libxsde/xsde/makefile)
diff --git a/tests/cxx/hybrid/pattern/test-000.std b/tests/cxx/hybrid/pattern/test-000.std
new file mode 100644
index 0000000..7b0d83a
--- /dev/null
+++ b/tests/cxx/hybrid/pattern/test-000.std
@@ -0,0 +1,11 @@
+<t:root xmlns:t="test">
+ <t1>foobarbaz</t1>
+ <t2>12,32,123,321,</t2>
+ <t3>12321</t3>
+ <t4>12321</t4>
+ <t5>
+
+
+ </t5>
+ <t6>abcdef</t6>
+</t:root> \ No newline at end of file
diff --git a/tests/cxx/hybrid/pattern/test-000.xml b/tests/cxx/hybrid/pattern/test-000.xml
new file mode 100644
index 0000000..d503cb1
--- /dev/null
+++ b/tests/cxx/hybrid/pattern/test-000.xml
@@ -0,0 +1,13 @@
+<t:root xmlns:t="test">
+
+ <t1>foobarbaz</t1>
+ <t2>12,32,123,321,</t2>
+ <t3>12321</t3>
+ <t4>12321</t4>
+ <t5>
+
+
+ </t5>
+ <t6>abcdef</t6>
+
+</t:root>
diff --git a/tests/cxx/hybrid/pattern/test.xsd b/tests/cxx/hybrid/pattern/test.xsd
new file mode 100644
index 0000000..e008709
--- /dev/null
+++ b/tests/cxx/hybrid/pattern/test.xsd
@@ -0,0 +1,53 @@
+<?xml version="1.0"?>
+<schema xmlns="http://www.w3.org/2001/XMLSchema" xmlns:t="test" targetNamespace="test">
+
+ <simpleType name="t1">
+ <restriction base="string">
+ <pattern value="foo.*baz"/>
+ </restriction>
+ </simpleType>
+
+ <simpleType name="t2">
+ <restriction base="string">
+ <pattern value="([123]+,)+"/>
+ </restriction>
+ </simpleType>
+
+ <simpleType name="t3">
+ <restriction base="string">
+ <pattern value="[1-9]*"/>
+ </restriction>
+ </simpleType>
+
+ <simpleType name="t4">
+ <restriction base="string">
+ <pattern value="\d+"/>
+ </restriction>
+ </simpleType>
+
+ <simpleType name="t5">
+ <restriction base="string">
+ <pattern value="\s+"/>
+ </restriction>
+ </simpleType>
+
+ <simpleType name="t6">
+ <restriction base="string">
+ <pattern value="\w+"/>
+ </restriction>
+ </simpleType>
+
+ <complexType name="type">
+ <sequence>
+ <element name="t1" type="t:t1" maxOccurs="unbounded"/>
+ <element name="t2" type="t:t2" maxOccurs="unbounded"/>
+ <element name="t3" type="t:t3" maxOccurs="unbounded"/>
+ <element name="t4" type="t:t4" maxOccurs="unbounded"/>
+ <element name="t5" type="t:t5" maxOccurs="unbounded"/>
+ <element name="t6" type="t:t6" maxOccurs="unbounded"/>
+ </sequence>
+ </complexType>
+
+ <element name="root" type="t:type"/>
+
+</schema>
diff --git a/xsde/cxx/parser/elements.cxx b/xsde/cxx/parser/elements.cxx
index 2c138ce..25e2032 100644
--- a/xsde/cxx/parser/elements.cxx
+++ b/xsde/cxx/parser/elements.cxx
@@ -207,24 +207,30 @@ namespace CXX
SemanticGraph::Type& ub (ultimate_base (c));
Restricts::FacetIterator end (r.facet_end ());
- if ((ub.is_a<SemanticGraph::Fundamental::String> () ||
- ub.is_a<SemanticGraph::Fundamental::AnyURI> ()) &&
- r.facet_find (L"whiteSpace") != end)
- return true;
-
- if (validation)
+ if (ub.is_a<SemanticGraph::Fundamental::String> () ||
+ ub.is_a<SemanticGraph::Fundamental::AnyURI> ())
{
- if (ub.is_a<SemanticGraph::Fundamental::Short> () ||
- ub.is_a<SemanticGraph::Fundamental::UnsignedByte> () ||
- ub.is_a<SemanticGraph::Fundamental::UnsignedShort> () ||
- ub.is_a<SemanticGraph::Fundamental::UnsignedInt> () ||
- ub.is_a<SemanticGraph::Fundamental::String> () ||
- ub.is_a<SemanticGraph::Fundamental::AnyURI> ())
+ if (r.facet_find (L"whiteSpace") != end)
+ return true;
+
+ if (validation)
{
if (r.facet_find (L"length") != end ||
r.facet_find (L"minLength") != end ||
r.facet_find (L"maxLength") != end ||
- r.facet_find (L"minInclusive") != end ||
+ r.facet_find (L"pattern") != end)
+ return true;
+ }
+ }
+
+ if (ub.is_a<SemanticGraph::Fundamental::Short> () ||
+ ub.is_a<SemanticGraph::Fundamental::UnsignedByte> () ||
+ ub.is_a<SemanticGraph::Fundamental::UnsignedShort> () ||
+ ub.is_a<SemanticGraph::Fundamental::UnsignedInt> ())
+ {
+ if (validation)
+ {
+ if (r.facet_find (L"minInclusive") != end ||
r.facet_find (L"minExclusive") != end ||
r.facet_find (L"maxInclusive") != end ||
r.facet_find (L"maxExclusive") != end)
diff --git a/xsde/cxx/parser/parser-inline.cxx b/xsde/cxx/parser/parser-inline.cxx
index 984eca6..d20a3e7 100644
--- a/xsde/cxx/parser/parser-inline.cxx
+++ b/xsde/cxx/parser/parser-inline.cxx
@@ -17,54 +17,83 @@ namespace CXX
Void
facet_calls (SemanticGraph::Complex& c, Context& ctx)
{
+ using SemanticGraph::Restricts;
+
std::wostream& os (ctx.os);
- using SemanticGraph::Restricts;
+ SemanticGraph::Type& ub (ctx.ultimate_base (c));
Restricts& r (dynamic_cast<Restricts&> (c.inherits ()));
- for (Restricts::FacetIterator i (r.facet_begin ());
- i != r.facet_end (); ++i)
+ if (ub.is_a<SemanticGraph::Fundamental::String> () ||
+ ub.is_a<SemanticGraph::Fundamental::AnyURI> ())
{
- if (i->first == L"length")
- {
- os << "this->_length_facet (" << i->second << "UL);";
- }
- else if (i->first == L"minLength")
- {
- os << "this->_min_length_facet (" << i->second << "UL);";
- }
- else if (i->first == L"maxLength")
- {
- os << "this->_max_length_facet (" << i->second << "UL);";
- }
- else if (i->first == L"minInclusive")
- {
- os << "this->_min_facet (" << i->second << ", true);";
- }
- else if (i->first == L"minExclusive")
+ for (Restricts::FacetIterator i (r.facet_begin ());
+ i != r.facet_end (); ++i)
{
- os << "this->_min_facet (" << i->second << ", false);";
- }
- else if (i->first == L"maxInclusive")
- {
- os << "this->_max_facet (" << i->second << ", true);";
- }
- else if (i->first == L"maxExclusive")
- {
- os << "this->_max_facet (" << i->second << ", false);";
+ if (i->first == L"whiteSpace")
+ {
+ os << "this->_whitespace_facet (";
+
+ if (i->second == L"preserve")
+ os << "0";
+ else if (i->second == L"replace")
+ os << "1";
+ else if (i->second == L"collapse")
+ os << "2";
+
+ os << ");";
+ continue;
+ }
+
+ if (!ctx.validation)
+ continue;
+
+ if (i->first == L"length")
+ {
+ os << "this->_length_facet (" << i->second << "UL);";
+ }
+ else if (i->first == L"minLength")
+ {
+ os << "this->_min_length_facet (" << i->second << "UL);";
+ }
+ else if (i->first == L"maxLength")
+ {
+ os << "this->_max_length_facet (" << i->second << "UL);";
+ }
+ else if (i->first == L"pattern")
+ {
+ os << "this->_pattern_facet (" << ctx.strlit (i->second) << ");";
+ }
}
- else if (i->first == L"whiteSpace")
- {
- os << "this->_whitespace_facet (";
+ }
- if (i->second == L"preserve")
- os << "0";
- else if (i->second == L"replace")
- os << "1";
- else if (i->second == L"collapse")
- os << "2";
+ if (ub.is_a<SemanticGraph::Fundamental::Short> () ||
+ ub.is_a<SemanticGraph::Fundamental::UnsignedByte> () ||
+ ub.is_a<SemanticGraph::Fundamental::UnsignedShort> () ||
+ ub.is_a<SemanticGraph::Fundamental::UnsignedInt> ())
+ {
+ for (Restricts::FacetIterator i (r.facet_begin ());
+ i != r.facet_end (); ++i)
+ {
+ if (!ctx.validation)
+ continue;
- os << ");";
+ if (i->first == L"minInclusive")
+ {
+ os << "this->_min_facet (" << i->second << ", true);";
+ }
+ else if (i->first == L"minExclusive")
+ {
+ os << "this->_min_facet (" << i->second << ", false);";
+ }
+ else if (i->first == L"maxInclusive")
+ {
+ os << "this->_max_facet (" << i->second << ", true);";
+ }
+ else if (i->first == L"maxExclusive")
+ {
+ os << "this->_max_facet (" << i->second << ", false);";
+ }
}
}
}
diff --git a/xsde/cxx/serializer/elements.cxx b/xsde/cxx/serializer/elements.cxx
index 62fd32d..e598f8f 100644
--- a/xsde/cxx/serializer/elements.cxx
+++ b/xsde/cxx/serializer/elements.cxx
@@ -225,24 +225,34 @@ namespace CXX
return false;
SemanticGraph::Type& ub (ultimate_base (c));
+ Restricts::FacetIterator end (r.facet_end ());
+
+ if (ub.is_a<SemanticGraph::Fundamental::String> () ||
+ ub.is_a<SemanticGraph::Fundamental::AnyURI> ())
+ {
+ if (validation)
+ {
+ if (r.facet_find (L"length") != end ||
+ r.facet_find (L"minLength") != end ||
+ r.facet_find (L"maxLength") != end ||
+ r.facet_find (L"pattern") != end)
+ return true;
+ }
+ }
if (ub.is_a<SemanticGraph::Fundamental::Short> () ||
ub.is_a<SemanticGraph::Fundamental::UnsignedByte> () ||
ub.is_a<SemanticGraph::Fundamental::UnsignedShort> () ||
- ub.is_a<SemanticGraph::Fundamental::UnsignedInt> () ||
- ub.is_a<SemanticGraph::Fundamental::String> () ||
- ub.is_a<SemanticGraph::Fundamental::AnyURI> ())
+ ub.is_a<SemanticGraph::Fundamental::UnsignedInt> ())
{
- Restricts::FacetIterator end (r.facet_end ());
-
- if (r.facet_find (L"length") != end ||
- r.facet_find (L"minLength") != end ||
- r.facet_find (L"maxLength") != end ||
- r.facet_find (L"minInclusive") != end ||
- r.facet_find (L"minExclusive") != end ||
- r.facet_find (L"maxInclusive") != end ||
- r.facet_find (L"maxExclusive") != end)
- return true;
+ if (validation)
+ {
+ if (r.facet_find (L"minInclusive") != end ||
+ r.facet_find (L"minExclusive") != end ||
+ r.facet_find (L"maxInclusive") != end ||
+ r.facet_find (L"maxExclusive") != end)
+ return true;
+ }
}
}
diff --git a/xsde/cxx/serializer/serializer-inline.cxx b/xsde/cxx/serializer/serializer-inline.cxx
index f888e4f..4bebc9c 100644
--- a/xsde/cxx/serializer/serializer-inline.cxx
+++ b/xsde/cxx/serializer/serializer-inline.cxx
@@ -17,41 +17,68 @@ namespace CXX
Void
facet_calls (SemanticGraph::Complex& c, Context& ctx)
{
+ using SemanticGraph::Restricts;
+
std::wostream& os (ctx.os);
- using SemanticGraph::Restricts;
+ SemanticGraph::Type& ub (ctx.ultimate_base (c));
Restricts& r (dynamic_cast<Restricts&> (c.inherits ()));
- for (Restricts::FacetIterator i (r.facet_begin ());
- i != r.facet_end (); ++i)
+ if (ub.is_a<SemanticGraph::Fundamental::String> () ||
+ ub.is_a<SemanticGraph::Fundamental::AnyURI> ())
{
- if (i->first == L"length")
- {
- os << "this->_length_facet (" << i->second << "UL);";
- }
- else if (i->first == L"minLength")
- {
- os << "this->_min_length_facet (" << i->second << "UL);";
- }
- else if (i->first == L"maxLength")
- {
- os << "this->_max_length_facet (" << i->second << "UL);";
- }
- else if (i->first == L"minInclusive")
+ for (Restricts::FacetIterator i (r.facet_begin ());
+ i != r.facet_end (); ++i)
{
- os << "this->_min_facet (" << i->second << ", true);";
- }
- else if (i->first == L"minExclusive")
- {
- os << "this->_min_facet (" << i->second << ", false);";
- }
- else if (i->first == L"maxInclusive")
- {
- os << "this->_max_facet (" << i->second << ", true);";
+ if (!ctx.validation)
+ continue;
+
+ if (i->first == L"length")
+ {
+ os << "this->_length_facet (" << i->second << "UL);";
+ }
+ else if (i->first == L"minLength")
+ {
+ os << "this->_min_length_facet (" << i->second << "UL);";
+ }
+ else if (i->first == L"maxLength")
+ {
+ os << "this->_max_length_facet (" << i->second << "UL);";
+ }
+ else if (i->first == L"pattern")
+ {
+ os << "this->_pattern_facet (" << ctx.strlit (i->second) << ");";
+ }
}
- else if (i->first == L"maxExclusive")
+ }
+
+ if (ub.is_a<SemanticGraph::Fundamental::Short> () ||
+ ub.is_a<SemanticGraph::Fundamental::UnsignedByte> () ||
+ ub.is_a<SemanticGraph::Fundamental::UnsignedShort> () ||
+ ub.is_a<SemanticGraph::Fundamental::UnsignedInt> ())
+ {
+ for (Restricts::FacetIterator i (r.facet_begin ());
+ i != r.facet_end (); ++i)
{
- os << "this->_max_facet (" << i->second << ", false);";
+ if (!ctx.validation)
+ continue;
+
+ if (i->first == L"minInclusive")
+ {
+ os << "this->_min_facet (" << i->second << ", true);";
+ }
+ else if (i->first == L"minExclusive")
+ {
+ os << "this->_min_facet (" << i->second << ", false);";
+ }
+ else if (i->first == L"maxInclusive")
+ {
+ os << "this->_max_facet (" << i->second << ", true);";
+ }
+ else if (i->first == L"maxExclusive")
+ {
+ os << "this->_max_facet (" << i->second << ", false);";
+ }
}
}
}