-static xmlDocPtr xml_parse(text *data, bool is_document, bool preserve_whitespace);
+static int parse_xml_decl(const xmlChar *str, size_t *lenp, xmlChar **version, xmlChar **encoding, int *standalone);
+static xmlDocPtr xml_parse(text *data, bool is_document, bool preserve_whitespace, xmlChar *encoding);
* that ERROR occurred if parsing failed.
+ * We try to avoid generating an XML declaration if possible.
+ * version specified in SQL:2003.
+ if ((res_code = parse_xml_decl((xmlChar *) str, &len, &version, &encoding, &standalone)) == 0)
+ appendStringInfoString(&buf, "
+ if (version)
+ appendStringInfo(&buf, " version=\"%s\"", version);
+ else
+ appendStringInfo(&buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
+ if (target_encoding && target_encoding != PG_UTF8)
+ /* XXX might be useful to convert this to IANA names
+ * (ISO-8859-1 instead of LATIN1 etc.); needs field
+ * experience */
+ appendStringInfo(&buf, " encoding=\"%s\"", pg_encoding_to_char(target_encoding));
+ if (standalone == 1)
+ appendStringInfoString(&buf, " standalone=\"yes\"");
+ else if (standalone == 0)
+ appendStringInfoString(&buf, " standalone=\"no\"");
+ appendStringInfoString(&buf, "?>");
+ }
+ else
+ {
+ /*
+ * If we are not going to produce an XML declaration, eat
+ * a single newline in the original string to prevent
+ * empty first lines in the output.
+ */
+ if (*(str + len) == '\n')
+ len += 1;
+ }
+ appendStringInfoString(&buf, str + len);
+
+ return buf.data;
+ }
+
+ xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR,
+ "could not parse XML declaration in stored value", res_code);
+#endif
+ return str;
+}
+
+
Datum
xml_out(PG_FUNCTION_ARGS)
{
- xmltype *s = PG_GETARG_XML_P(0);
- char *result;
- int32 len;
-
- len = VARSIZE(s) - VARHDRSZ;
- result = palloc(len + 1);
- memcpy(result, VARDATA(s), len);
- result[len] = '\0';
+ xmltype *x = PG_GETARG_XML_P(0);
- PG_RETURN_CSTRING(result);
+ /*
+ * xml_out removes the encoding property in all cases. This is
+ * because we cannot control from here whether the datum will be
+ * converted to a different client encoding, so we'd do more harm
+ * than good by including it.
+ */
+ PG_RETURN_CSTRING(xml_out_internal(x, 0));
}
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
xmltype *result;
char *str;
+ char *newstr;
int nbytes;
xmlDocPtr doc;
+ xmlChar *encoding = NULL;
str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
- result = (xmltype *) palloc(nbytes + VARHDRSZ);
+ result = palloc(nbytes + VARHDRSZ);
VARATT_SIZEP(result) = nbytes + VARHDRSZ;
memcpy(VARDATA(result), str, nbytes);
- pfree(str);
+
+ parse_xml_decl((xmlChar *) str, NULL, NULL, &encoding, NULL);
/*
* Parse the data to check if it is well-formed XML data. Assume
* that ERROR occurred if parsing failed.
*/
- doc = xml_parse(result, false, true);
+ doc = xml_parse(result, false, true, encoding);
xmlFreeDoc(doc);
+ newstr = (char *) pg_do_encoding_conversion((unsigned char *) str,
+ nbytes,
+ encoding ? pg_char_to_encoding((char *) encoding) : PG_UTF8,
+ GetDatabaseEncoding());
+
+ pfree(str);
+
+ if (newstr != str)
+ {
+ free(result);
+
+ nbytes = strlen(newstr);
+
+ result = palloc(nbytes + VARHDRSZ);
+ VARATT_SIZEP(result) = nbytes + VARHDRSZ;
+ memcpy(VARDATA(result), newstr, nbytes);
+ }
+
PG_RETURN_XML_P(result);
#else
NO_XML_SUPPORT();
xml_send(PG_FUNCTION_ARGS)
{
xmltype *x = PG_GETARG_XML_P(0);
+ char *outval = xml_out_internal(x, pg_get_client_encoding());
StringInfoData buf;
pq_begintypsend(&buf);
- pq_sendbytes(&buf, VARDATA(x), VARSIZE(x) - VARHDRSZ);
+ pq_sendstring(&buf, outval);
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}
}
+static xmltype *
+cstring_to_xmltype(const char *string)
+{
+ int32 len;
+ xmltype *result;
+
+ len = strlen(string) + VARHDRSZ;
+ result = palloc(len);
+ VARATT_SIZEP(result) = len;
+ memcpy(VARDATA(result), string, len - VARHDRSZ);
+
+ return result;
+}
+
+
static xmltype *
xmlBuffer_to_xmltype(xmlBufferPtr buf)
{
{
#ifdef USE_LIBXML
text *arg = PG_GETARG_TEXT_P(0);
- int len = VARATT_SIZEP(arg) - VARHDRSZ;
+ int len = VARSIZE(arg) - VARHDRSZ;
StringInfoData buf;
int i;
#ifdef USE_LIBXML
xmlDocPtr doc;
- doc = xml_parse(data, is_document, preserve_whitespace);
+ doc = xml_parse(data, is_document, preserve_whitespace, NULL);
xmlFreeDoc(doc);
return (xmltype *) data;
xmlBufferPtr buffer;
xmlSaveCtxtPtr save;
- doc = xml_parse((text *) data, true, true);
+ doc = xml_parse((text *) data, true, true, NULL);
if (version)
doc->version = xmlStrdup(xml_text2xmlChar(version));
}
buffer = xmlBufferCreate();
- save = xmlSaveToBuffer(buffer, NULL, 0);
+ save = xmlSaveToBuffer(buffer, "UTF-8", 0);
xmlSaveDoc(save, doc);
xmlSaveClose(save);
xmlFreeDoc(doc);
- result = xmlBuffer_to_xmltype(buffer);
+ result = cstring_to_xmltype((char *) pg_do_encoding_conversion((unsigned char *) xmlBufferContent(buffer),
+ xmlBufferLength(buffer),
+ PG_UTF8,
+ GetDatabaseEncoding()));
xmlBufferFree(buffer);
return result;
#else
PG_TRY();
{
- doc = xml_parse((text *) arg, true, true);
+ doc = xml_parse((text *) arg, true, true, NULL);
result = true;
}
PG_CATCH();
#define SKIP_XML_SPACE(p) while (xmlIsBlank_ch(*(p))) (p)++
static int
-parse_xml_decl(const xmlChar *str, size_t *len, xmlChar **encoding, int *standalone)
+parse_xml_decl(const xmlChar *str, size_t *lenp, xmlChar **version, xmlChar **encoding, int *standalone)
{
const xmlChar *p;
const xmlChar *save_p;
+ size_t len;
p = str;
+ if (version)
+ *version = NULL;
+ if (encoding)
+ *encoding = NULL;
+ if (standalone)
+ *standalone = -1;
+
if (xmlStrncmp(p, (xmlChar *)"
goto finished;
return XML_ERR_VERSION_MISSING;
p += 1;
SKIP_XML_SPACE(p);
- if (xmlStrncmp(p, (xmlChar *)"'1.0'", 5) != 0 && xmlStrncmp(p, (xmlChar *)"\"1.0\"", 5) != 0)
+
+ if (*p == '\'' || *p == '"')
+ {
+ const xmlChar *q;
+
+ q = xmlStrchr(p + 1, *p);
+ if (!q)
+ return XML_ERR_VERSION_MISSING;
+
+ if (version)
+ *version = xmlStrndup(p + 1, q - p - 1);
+ p = q + 1;
+ }
+ else
return XML_ERR_VERSION_MISSING;
- p += 5;
/* encoding */
save_p = p;
if (!q)
return XML_ERR_MISSING_ENCODING;
+ if (encoding)
*encoding = xmlStrndup(p + 1, q - p - 1);
p = q + 1;
}
else
{
p = save_p;
- *encoding = NULL;
}
/* standalone */
else
{
p = save_p;
- *standalone = -1;
}
SKIP_XML_SPACE(p);
p += 2;
finished:
- if (len)
- *len = (p - str);
+ len = p - str;
+
+ for (p = str; p < str + len; p++)
+ if (*p > 127)
+ return XML_ERR_INVALID_CHAR;
+
+ if (lenp)
+ *lenp = len;
+
return XML_ERR_OK;
}
* TODO what about internal URI for docs? (see PG_XML_DEFAULT_URI below)
*/
static xmlDocPtr
-xml_parse(text *data, bool is_document, bool preserve_whitespace)
+xml_parse(text *data, bool is_document, bool preserve_whitespace, xmlChar *encoding)
{
- int res_code;
int32 len;
xmlChar *string;
+ xmlChar *utf8string;
xmlParserCtxtPtr ctxt = NULL;
xmlDocPtr doc = NULL;
len = VARSIZE(data) - VARHDRSZ; /* will be useful later */
string = xml_text2xmlChar(data);
+ utf8string = pg_do_encoding_conversion(string,
+ len,
+ encoding
+ ? pg_char_to_encoding((char *) encoding)
+ : GetDatabaseEncoding(),
+ PG_UTF8);
+
xml_init();
/* We use a PG_TRY block to ensure libxml is cleaned up on error */
* As for external DTDs, we try to support them too, (see
* SQL/XML:10.16.7.e)
*/
- doc = xmlCtxtReadMemory(ctxt, (char *) string, len,
- PG_XML_DEFAULT_URI, NULL,
+ doc = xmlCtxtReadDoc(ctxt, utf8string,
+ PG_XML_DEFAULT_URI,
+ "UTF-8",
XML_PARSE_NOENT | XML_PARSE_DTDATTR
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
if (doc == NULL)
}
else
{
+ int res_code;
size_t count;
- xmlChar *encoding = NULL;
+ xmlChar *version = NULL;
int standalone = -1;
doc = xmlNewDoc(NULL);
- res_code = parse_xml_decl(string, &count, &encoding, &standalone);
+ res_code = parse_xml_decl(utf8string, &count, &version, NULL, &standalone);
- /* TODO resolve: xmlParseBalancedChunkMemory assumes that string is UTF8 encoded! */
if (res_code == 0)
- res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, string + count, NULL);
+ res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, utf8string + count, NULL);
if (res_code != 0)
xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
"invalid XML content", res_code);
- doc->encoding = encoding;
+ doc->version = xmlStrdup(version);
+ doc->encoding = xmlStrdup((xmlChar *) "UTF-8");
doc->standalone = standalone;
}
- /* TODO encoding issues
- * (thoughts:
- * CASE:
- * - XML data has explicit encoding attribute in its prolog
- * - if not, assume that enc. of XML data is the same as client's one
- *
- * The common rule is to accept the XML data only if its encoding
- * is the same as encoding of the storage (server's). The other possible
- * option is to accept all the docs, but DO TRANSFORMATION and, if needed,
- * change the prolog.
- *
- * I think I'd stick the first way (for the 1st version),
- * it's much simplier (less errors...)
- * ) */
- /* ... */
-
if (ctxt)
xmlFreeParserCtxt(ctxt);
xmlCleanupParser();
SELECT xmlroot(xml '', version no value, standalone no value);
xmlroot
------------------------
-
+---------
(1 row)