From c2fdf95d6870ce661ecc97f955c98d5323d28315 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Fri, 8 Mar 2013 15:30:12 +0200 Subject: Add support for writing non-canonicalized XML Specifically, don't sort attributes and close tags immediately. Make this mode the default. --- genx.c | 221 +++++++++++++++++++++++++++++++++++++++++++++++++---------------- genx.h | 7 +++ 2 files changed, 173 insertions(+), 55 deletions(-) diff --git a/genx.c b/genx.c index 91b0e50..8a1e8e3 100644 --- a/genx.c +++ b/genx.c @@ -99,6 +99,7 @@ struct genxAttribute_rec collector value; int provided; /* provided for current element? */ attrType atype; + genxAttribute next; /* Attribute order chain if not canonical. */ }; /******************************* @@ -122,7 +123,7 @@ struct genxWriter_rec plist attributes; plist prefixes; plist stack; - struct genxAttribute_rec arec; + struct genxAttribute_rec arec; /* Dummy attribute used for lookup. */ char * etext[100]; genxAlloc alloc; genxDealloc dealloc; @@ -131,6 +132,13 @@ struct genxWriter_rec int ppIndent; int ppDepth; Boolean ppSimple; + + /* Canonicalization. */ + Boolean canonical; + + /* Attrbute order when not canonical. */ + genxAttribute firstAttribute; + genxAttribute lastAttribute; }; /******************************* @@ -605,7 +613,10 @@ genxWriter genxNew(genxAlloc alloc, genxDealloc dealloc, void * userData) xml->declaration = xml->defaultDecl; w->ppIndent = 0; /* Pretty-printing is disabled by default. */ + w->canonical = False; /* No canonicalization by default. */ + w->firstAttribute = NULL; + w->lastAttribute = NULL; return w; } @@ -626,6 +637,23 @@ genxStatus genxReset (genxWriter w) ((genxNamespace) w->namespaces.pointers[i])->baroque = False; } + /* Clear provided attributes. */ + for (i = 0; i < w->attributes.count; i++) + ((genxAttribute) w->attributes.pointers[i])->provided = False; + + /* Clear attribute list. */ + if (!w->canonical) + { + while (w->firstAttribute != NULL) + { + genxAttribute t = w->firstAttribute->next; + w->firstAttribute->next = NULL; + w->firstAttribute = t; + } + + w->lastAttribute = NULL; + } + w->status = GENX_SUCCESS; w->sequence = SEQUENCE_NO_DOC; @@ -664,6 +692,24 @@ int genxGetPrettyPrint(genxWriter w) } /* + * get/set canonicalization. + */ +genxStatus genxSetCanonical(genxWriter w, int flag) +{ + if (w->sequence == SEQUENCE_NO_DOC) + w->canonical = flag; + else + w->status = GENX_SEQUENCE_ERROR; + + return w->status; +} + +int genxGetCanonical(genxWriter w) +{ + return w->canonical; +} + +/* * get/set allocator */ void genxSetAlloc(genxWriter w, genxAlloc alloc) @@ -1089,6 +1135,7 @@ static genxAttribute declareAttribute(genxWriter w, genxNamespace ns, a->ns = ns; a->provided = False; a->atype = w->arec.atype; + a->next = NULL; if ((a->name = copy(w, name)) == NULL) { @@ -1150,7 +1197,7 @@ static genxStatus sendxBounded(genxWriter w, constUtf8 start, constUtf8 end) return GENX_IO_ERROR; } -#define SendCheck(w,s) if ((w->status=sendx(w,(utf8)s))!=GENX_SUCCESS) return w->status; +#define SendCheck(w,s) if ((w->status=sendx(w,(constUtf8)s))!=GENX_SUCCESS) return w->status /******************************* * XML writing routines. The semantics of the externally-facing ones are @@ -1191,6 +1238,32 @@ static genxStatus writeIndentation(genxWriter w) } /* + * Output attribute. + */ +static genxStatus writeAttribute(genxAttribute a) +{ + genxWriter w = a->writer; + + if (a->ns && a->ns->baroque && a->ns->declaration == w->xmlnsEquals) + return w->status = GENX_ATTRIBUTE_IN_DEFAULT_NAMESPACE; + + SendCheck(w, " "); + + if (a->ns) + { + SendCheck(w, a->ns->declaration->name + STRLEN_XMLNS_COLON); + SendCheck(w, ":"); + } + + SendCheck(w, a->name); + SendCheck(w, "=\""); + SendCheck(w, a->value.buf); + SendCheck(w, "\""); + + return w->status; +} + +/* * Write out the attributes we've been gathering up for an element. We save * them until we've gathered them all so they can be writen in canonical * order. @@ -1199,7 +1272,7 @@ static genxStatus writeIndentation(genxWriter w) * we build it, then as each attribute is added, we fill in its value and * mark the fact that it's been added, in the "provided" field. */ -static genxStatus writeStartTag(genxWriter w) +static genxStatus writeStartTag(genxWriter w, Boolean close) { int i; genxAttribute * aa = (genxAttribute *) w->attributes.pointers; @@ -1233,27 +1306,42 @@ static genxStatus writeStartTag(genxWriter w) } SendCheck(w, e->type); - for (i = 0; i < w->attributes.count; i++) + /* If we are canonicalizing, then write sorted attributes. Otherwise + write them in the order specified. */ + if (w->canonical) { - if (aa[i]->provided) + for (i = 0; i < w->attributes.count; i++) { - if (aa[i]->ns && aa[i]->ns->baroque && - aa[i]->ns->declaration == w->xmlnsEquals) - return w->status = GENX_ATTRIBUTE_IN_DEFAULT_NAMESPACE; - - SendCheck(w, " "); - - if (aa[i]->ns) + if (aa[i]->provided) { - SendCheck(w, aa[i]->ns->declaration->name + STRLEN_XMLNS_COLON) - SendCheck(w, ":"); + if (writeAttribute (aa[i]) != GENX_SUCCESS) + return w->status; + + aa[i]->provided = False; } - SendCheck(w, aa[i]->name); - SendCheck(w, "=\""); - SendCheck(w, aa[i]->value.buf); - SendCheck(w, "\""); } } + else + { + /* Keep the chain consistent even if we bail out mid way because of + an error. This way we will still be able to clear it in reset().*/ + while (w->firstAttribute != NULL) + { + genxAttribute t = w->firstAttribute->next; + + if (writeAttribute (w->firstAttribute) != GENX_SUCCESS) + return w->status; + + w->firstAttribute->provided = False; + w->firstAttribute->next = NULL; + w->firstAttribute = t; + } + + w->lastAttribute = NULL; + } + + if (close) + SendCheck(w, "/"); SendCheck(w, ">"); return GENX_SUCCESS; } @@ -1325,7 +1413,6 @@ genxStatus genxUnsetDefaultNamespace(genxWriter w) genxStatus genxStartElement(genxElement e) { genxWriter w = e->writer; - int i; switch (w->sequence) { @@ -1335,7 +1422,7 @@ genxStatus genxStartElement(genxElement e) return w->status = GENX_SEQUENCE_ERROR; case SEQUENCE_START_TAG: case SEQUENCE_ATTRIBUTES: - if ((w->status = writeStartTag(w)) != GENX_SUCCESS) + if ((w->status = writeStartTag(w, False)) != GENX_SUCCESS) return w->status; break; case SEQUENCE_PRE_DOC: @@ -1345,10 +1432,6 @@ genxStatus genxStartElement(genxElement e) w->sequence = SEQUENCE_START_TAG; - /* clear provided attributes */ - for (i = 0; i < w->attributes.count; i++) - ((genxAttribute) w->attributes.pointers[i])->provided = 0; - /* * push the stack. We push a NULL after a pointer to this element * because the stack will also contain pointers to the namespace @@ -1575,7 +1658,17 @@ static genxStatus addAttribute(genxAttribute a, constUtf8 valuestr) if (valuestr && a->provided) return w->status = GENX_DUPLICATE_ATTRIBUTE; - a->provided = 1; + + a->provided = True; + + /* Add the attribute to the ordered list if not canonical. */ + if (!w->canonical) + { + if (w->lastAttribute != NULL) + w->lastAttribute = w->lastAttribute->next = a; + else + w->lastAttribute = w->firstAttribute = a; + } return GENX_SUCCESS; } @@ -1632,15 +1725,24 @@ genxStatus genxEndAttribute(genxWriter w) if (a->provided) return w->status = GENX_DUPLICATE_ATTRIBUTE; - a->provided = 1; + a->provided = True; + + /* Add the attribute to the ordered list if not canonical. */ + if (!w->canonical) + { + if (w->lastAttribute != NULL) + w->lastAttribute = w->lastAttribute->next = a; + else + w->lastAttribute = w->firstAttribute = a; + } return GENX_SUCCESS; } genxStatus genxEndElement(genxWriter w) { - genxElement e; int i; + Boolean close = True; switch (w->sequence) { @@ -1651,42 +1753,51 @@ genxStatus genxEndElement(genxWriter w) return w->status = GENX_SEQUENCE_ERROR; case SEQUENCE_START_TAG: case SEQUENCE_ATTRIBUTES: - if ((w->status = writeStartTag(w)) != GENX_SUCCESS) + if ((w->status = writeStartTag(w, !w->canonical)) != GENX_SUCCESS) return w->status; + close = w->canonical; break; case SEQUENCE_CONTENT: break; } /* - * first peek into the stack to find the right namespace declaration - * (if any) so we can properly prefix the end-tag. Have to do this - * before unwinding the stack because that might reset some xmlns - * prefixes to the context in the parent element + * Output the closing tag. */ - for (i = w->stack.count - 1; w->stack.pointers[i] != NULL; i -= 2) - ; - e = (genxElement) w->stack.pointers[--i]; - - if (w->ppIndent) + if (close) { - w->ppDepth--; + genxElement e; - if (!w->ppSimple) - if (writeIndentation (w) != GENX_SUCCESS) - return w->status; + /* + * first peek into the stack to find the right namespace declaration + * (if any) so we can properly prefix the end-tag. Have to do this + * before unwinding the stack because that might reset some xmlns + * prefixes to the context in the parent element + */ + for (i = w->stack.count - 1; w->stack.pointers[i] != NULL; i -= 2) + ; + e = (genxElement) w->stack.pointers[--i]; - w->ppSimple = False; - } + if (w->ppIndent) + { + w->ppDepth--; - SendCheck(w, "ns && e->ns->declaration != w->xmlnsEquals) - { - SendCheck(w, e->ns->declaration->name + STRLEN_XMLNS_COLON); - SendCheck(w, ":"); + if (!w->ppSimple) + if (writeIndentation (w) != GENX_SUCCESS) + return w->status; + + w->ppSimple = False; + } + + SendCheck(w, "ns && e->ns->declaration != w->xmlnsEquals) + { + SendCheck(w, e->ns->declaration->name + STRLEN_XMLNS_COLON); + SendCheck(w, ":"); + } + SendCheck(w, e->type); + SendCheck(w, ">"); } - SendCheck(w, e->type); - SendCheck(w, ">"); /* * pop zero or more namespace declarations, then a null, then the @@ -1807,7 +1918,7 @@ genxStatus genxAddText(genxWriter w, constUtf8 start) if (w->sequence == SEQUENCE_START_TAG || w->sequence == SEQUENCE_ATTRIBUTES) { - if ((w->status = writeStartTag(w)) != GENX_SUCCESS) + if ((w->status = writeStartTag(w, False)) != GENX_SUCCESS) return w->status; w->sequence = SEQUENCE_CONTENT; } @@ -1840,7 +1951,7 @@ genxStatus genxAddBoundedText(genxWriter w, constUtf8 start, constUtf8 end) if (w->sequence == SEQUENCE_START_TAG || w->sequence == SEQUENCE_ATTRIBUTES) { - if ((w->status = writeStartTag(w)) != GENX_SUCCESS) + if ((w->status = writeStartTag(w, False)) != GENX_SUCCESS) return w->status; w->sequence = SEQUENCE_CONTENT; } @@ -1880,7 +1991,7 @@ genxStatus genxAddCharacter(genxWriter w, int c) if (w->sequence == SEQUENCE_START_TAG || w->sequence == SEQUENCE_ATTRIBUTES) { - if ((w->status = writeStartTag(w)) != GENX_SUCCESS) + if ((w->status = writeStartTag(w, False)) != GENX_SUCCESS) return w->status; w->sequence = SEQUENCE_CONTENT; } @@ -2005,7 +2116,7 @@ genxStatus genxComment(genxWriter w, constUtf8 text) if (w->sequence == SEQUENCE_START_TAG || w->sequence == SEQUENCE_ATTRIBUTES) { - if ((w->status = writeStartTag(w)) != GENX_SUCCESS) + if ((w->status = writeStartTag(w, False)) != GENX_SUCCESS) return w->status; w->sequence = SEQUENCE_CONTENT; } @@ -2058,7 +2169,7 @@ genxStatus genxPI(genxWriter w, constUtf8 target, constUtf8 text) if (w->sequence == SEQUENCE_START_TAG || w->sequence == SEQUENCE_ATTRIBUTES) { - if ((w->status = writeStartTag(w)) != GENX_SUCCESS) + if ((w->status = writeStartTag(w, False)) != GENX_SUCCESS) return w->status; w->sequence = SEQUENCE_CONTENT; } diff --git a/genx.h b/genx.h index 6369990..0d4fd48 100644 --- a/genx.h +++ b/genx.h @@ -116,6 +116,13 @@ genxStatus genxSetPrettyPrint(genxWriter w, int indentation); int genxGetPrettyPrint(genxWriter w); /* + * Set/get canonicalization. If true, then output explicit closing + * tags and sort attributes. Default is false. + */ +genxStatus genxSetCanonical(genxWriter w, int flag); +int genxGetCanonical(genxWriter w); + +/* * User-provided memory allocator, if desired. For example, if you were * in an Apache module, you could arrange for genx to use ap_palloc by * making the pool accessible via the userData call. -- cgit v1.1