aboutsummaryrefslogtreecommitdiff
path: root/libstudxml/details/expat/xmltok.c
diff options
context:
space:
mode:
Diffstat (limited to 'libstudxml/details/expat/xmltok.c')
-rw-r--r--libstudxml/details/expat/xmltok.c117
1 files changed, 86 insertions, 31 deletions
diff --git a/libstudxml/details/expat/xmltok.c b/libstudxml/details/expat/xmltok.c
index 5a8f36f..a36735c 100644
--- a/libstudxml/details/expat/xmltok.c
+++ b/libstudxml/details/expat/xmltok.c
@@ -336,11 +336,12 @@ enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
UTF8_cval4 = 0xf0
};
-static void PTRCALL
+static enum XML_Convert_Result PTRCALL
utf8_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
{
+ enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
char *to;
const char *from;
@@ -348,30 +349,45 @@ utf8_toUtf8(const ENCODING *enc,
if (fromLim - *fromP > toLim - *toP) {
/* Avoid copying partial characters. */
+ res = XML_CONVERT_OUTPUT_EXHAUSTED;
for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)
if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
break;
}
- for (to = *toP, from = *fromP; from != fromLim; from++, to++)
+ for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++)
*to = *from;
*fromP = from;
*toP = to;
+
+ if ((to == toLim) && (from < fromLim))
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
+ else
+ return res;
}
-static void PTRCALL
+static enum XML_Convert_Result PTRCALL
utf8_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim)
{
+ enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
unsigned short *to = *toP;
const char *from = *fromP;
- while (from != fromLim && to != toLim) {
+ while (from < fromLim && to < toLim) {
switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
case BT_LEAD2:
+ if (fromLim - from < 2) {
+ res = XML_CONVERT_INPUT_INCOMPLETE;
+ break;
+ }
*to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
from += 2;
break;
case BT_LEAD3:
+ if (fromLim - from < 3) {
+ res = XML_CONVERT_INPUT_INCOMPLETE;
+ break;
+ }
*to++ = (unsigned short)(((from[0] & 0xf) << 12)
| ((from[1] & 0x3f) << 6) | (from[2] & 0x3f));
from += 3;
@@ -379,8 +395,14 @@ utf8_toUtf16(const ENCODING *enc,
case BT_LEAD4:
{
unsigned long n;
- if (to + 1 == toLim)
+ if (toLim - to < 2) {
+ res = XML_CONVERT_OUTPUT_EXHAUSTED;
goto after;
+ }
+ if (fromLim - from < 4) {
+ res = XML_CONVERT_INPUT_INCOMPLETE;
+ goto after;
+ }
n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
| ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
n -= 0x10000;
@@ -398,6 +420,7 @@ utf8_toUtf16(const ENCODING *enc,
after:
*fromP = from;
*toP = to;
+ return res;
}
#ifdef XML_NS
@@ -446,7 +469,7 @@ static const struct normal_encoding internal_utf8_encoding = {
STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
};
-static void PTRCALL
+static enum XML_Convert_Result PTRCALL
latin1_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
@@ -456,32 +479,37 @@ latin1_toUtf8(const ENCODING *enc,
for (;;) {
unsigned char c;
if (*fromP == fromLim)
- break;
+ return XML_CONVERT_COMPLETED;
c = (unsigned char)**fromP;
if (c & 0x80) {
if (toLim - *toP < 2)
- break;
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
*(*toP)++ = (char)((c >> 6) | UTF8_cval2);
*(*toP)++ = (char)((c & 0x3f) | 0x80);
(*fromP)++;
}
else {
if (*toP == toLim)
- break;
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
*(*toP)++ = *(*fromP)++;
}
}
}
-static void PTRCALL
+static enum XML_Convert_Result PTRCALL
latin1_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim)
{
UNUSED(enc);
- while (*fromP != fromLim && *toP != toLim)
+ while (*fromP < fromLim && *toP < toLim)
*(*toP)++ = (unsigned char)*(*fromP)++;
+
+ if ((*toP == toLim) && (*fromP < fromLim))
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
+ else
+ return XML_CONVERT_COMPLETED;
}
#ifdef XML_NS
@@ -508,15 +536,20 @@ static const struct normal_encoding latin1_encoding = {
STANDARD_VTABLE(sb_) ZERO_VTABLE
};
-static void PTRCALL
+static enum XML_Convert_Result PTRCALL
ascii_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
{
UNUSED(enc);
- while (*fromP != fromLim && *toP != toLim)
+ while (*fromP < fromLim && *toP < toLim)
*(*toP)++ = *(*fromP)++;
+
+ if ((*toP == toLim) && (*fromP < fromLim))
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
+ else
+ return XML_CONVERT_COMPLETED;
}
#ifdef XML_NS
@@ -563,14 +596,15 @@ unicode_byte_type(char hi, char lo)
}
#define DEFINE_UTF16_TO_UTF8(E) \
-static void PTRCALL \
+static enum XML_Convert_Result PTRCALL \
E ## toUtf8(const ENCODING *enc, \
const char **fromP, const char *fromLim, \
char **toP, const char *toLim) \
{ \
- const char *from; \
+ const char *from = *fromP; \
UNUSED(enc); \
- for (from = *fromP; from != fromLim; from += 2) { \
+ fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \
+ for (; from < fromLim; from += 2) { \
int plane; \
unsigned char lo2; \
unsigned char lo = GET_LO(from); \
@@ -580,7 +614,7 @@ E ## toUtf8(const ENCODING *enc, \
if (lo < 0x80) { \
if (*toP == toLim) { \
*fromP = from; \
- return; \
+ return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
*(*toP)++ = lo; \
break; \
@@ -590,7 +624,7 @@ E ## toUtf8(const ENCODING *enc, \
case 0x4: case 0x5: case 0x6: case 0x7: \
if (toLim - *toP < 2) { \
*fromP = from; \
- return; \
+ return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
*(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
*(*toP)++ = ((lo & 0x3f) | 0x80); \
@@ -598,7 +632,7 @@ E ## toUtf8(const ENCODING *enc, \
default: \
if (toLim - *toP < 3) { \
*fromP = from; \
- return; \
+ return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
/* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
*(*toP)++ = ((hi >> 4) | UTF8_cval3); \
@@ -608,7 +642,11 @@ E ## toUtf8(const ENCODING *enc, \
case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
if (toLim - *toP < 4) { \
*fromP = from; \
- return; \
+ return XML_CONVERT_OUTPUT_EXHAUSTED; \
+ } \
+ if (fromLim - from < 4) { \
+ *fromP = from; \
+ return XML_CONVERT_INPUT_INCOMPLETE; \
} \
plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
*(*toP)++ = ((plane >> 2) | UTF8_cval4); \
@@ -624,21 +662,33 @@ E ## toUtf8(const ENCODING *enc, \
} \
} \
*fromP = from; \
+ if (from < fromLim) \
+ return XML_CONVERT_INPUT_INCOMPLETE; \
+ else \
+ return XML_CONVERT_COMPLETED; \
}
#define DEFINE_UTF16_TO_UTF16(E) \
-static void PTRCALL \
+static enum XML_Convert_Result PTRCALL \
E ## toUtf16(const ENCODING *enc, \
const char **fromP, const char *fromLim, \
unsigned short **toP, const unsigned short *toLim) \
{ \
- UNUSED(enc); \
+ enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \
+ UNUSED(enc); \
+ fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \
/* Avoid copying first half only of surrogate */ \
if (fromLim - *fromP > ((toLim - *toP) << 1) \
- && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \
+ && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \
fromLim -= 2; \
- for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \
+ res = XML_CONVERT_INPUT_INCOMPLETE; \
+ } \
+ for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \
*(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
+ if ((*toP == toLim) && (*fromP < fromLim)) \
+ return XML_CONVERT_OUTPUT_EXHAUSTED; \
+ else \
+ return res; \
}
#define SET2(ptr, ch) \
@@ -1318,7 +1368,7 @@ unknown_isInvalid(const ENCODING *enc, const char *p)
return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
}
-static void PTRCALL
+static enum XML_Convert_Result PTRCALL
unknown_toUtf8(const ENCODING *enc,
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
@@ -1329,21 +1379,21 @@ unknown_toUtf8(const ENCODING *enc,
const char *utf8;
int n;
if (*fromP == fromLim)
- break;
+ return XML_CONVERT_COMPLETED;
utf8 = uenc->utf8[(unsigned char)**fromP];
n = *utf8++;
if (n == 0) {
int c = uenc->convert(uenc->userData, *fromP);
n = XmlUtf8Encode(c, buf);
if (n > toLim - *toP)
- break;
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
utf8 = buf;
*fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
- (BT_LEAD2 - 2));
}
else {
if (n > toLim - *toP)
- break;
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
(*fromP)++;
}
do {
@@ -1352,13 +1402,13 @@ unknown_toUtf8(const ENCODING *enc,
}
}
-static void PTRCALL
+static enum XML_Convert_Result PTRCALL
unknown_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim)
{
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
- while (*fromP != fromLim && *toP != toLim) {
+ while (*fromP < fromLim && *toP < toLim) {
unsigned short c = uenc->utf16[(unsigned char)**fromP];
if (c == 0) {
c = (unsigned short)
@@ -1370,6 +1420,11 @@ unknown_toUtf16(const ENCODING *enc,
(*fromP)++;
*(*toP)++ = c;
}
+
+ if ((*toP == toLim) && (*fromP < fromLim))
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
+ else
+ return XML_CONVERT_COMPLETED;
}
ENCODING *
@@ -1533,7 +1588,7 @@ initScan(const ENCODING * const *encodingTable,
{
const ENCODING **encPtr;
- if (ptr == end)
+ if (ptr >= end)
return XML_TOK_NONE;
encPtr = enc->encPtr;
if (ptr + 1 == end) {