-
+
SQL Syntax
ampersands. The length limitation still applies.
+ identifiers A variant of quoted
+ identifiers allows including escaped Unicode characters identified
+ by their code points. This variant starts
+ with U& (upper or lower case U followed by
+ ampersand) immediately before the opening double quote, without
+ any spaces in between, for example U&"foo".
+ (Note that this creates an ambiguity with the
+ operator &. Use spaces around the operator to
+ avoid this problem.) Inside the quotes, Unicode characters can be
+ specified in escaped form by writing a backslash followed by the
+ four-digit hexadecimal code point number or alternatively a
+ backslash followed by a plus sign followed by a six-digit
+ hexadecimal code point number. For example, the
+ identifier "data" could be written as
+U&"d\0061t\+000061"
+
+ The following less trivial example writes the Russian
+ word slon
(elephant) in Cyrillic letters:
+U&"\0441\043B\043E\043D"
+
+
+
+ If a different escape character than backslash is desired, it can
+ be specified using
+ clause after the string, for example:
+U&"d!0061t!+000061" UESCAPE '!'
+
+ The escape character can be any single character other than a
+ hexadecimal digit, the plus sign, a single quote, a double quote,
+ or a whitespace character. Note that the escape character is
+ written in single quotes, not double quotes.
+
+
+ To include the escape character in the identifier literally, write
+ it twice.
+
+
+ The Unicode escape syntax works only when the server encoding is
+ UTF8. When other server encodings are used, only code points in
+ the ASCII range (up to \007F) can be specified.
+
+
Quoting an identifier also makes it case-sensitive, whereas
unquoted names are always folded to lower case. For example, the
write two adjacent single quotes, e.g.
'Dianne''s horse'.
Note that this is not> the same as a double-quote
- character (">).
+ character (">).
following the standard.)
+
-
+
+
String Constants with C-Style Escapes
+
+
-
+ zone="sql-syntax-strings-escape">
+
PostgreSQL also accepts
escape>
string constants, which are an extension to the SQL standard.
An escape string constant is specified by writing the letter
Within an escape string, a backslash character (\>) begins a
C-like backslash escape> sequence, in which the combination
of backslash and following character(s) represent a special byte
- value:
+ value, as shown in .
+
Backslash Escape Sequences
- It is your responsibility that the byte sequences you create are
- valid characters in the server character set encoding. Any other
+ Any other
character following a backslash is taken literally. Thus, to
include a backslash character, write two backslashes (\\>).
Also, a single quote can be included in an escape string by writing
\', in addition to the normal way of ''>.
+ It is your responsibility that the byte sequences you create are
+ valid characters in the server character set encoding. When the
+ server encoding is UTF-8, then the alternative Unicode escape
+ syntax, explained in ,
+ should be used instead. (The alternative would be doing the
+ UTF-8 encoding by hand and writing out the bytes, which would be
+ very cumbersome.)
+
+
If the configuration parameter
+
+
String Constants with Unicode Escapes
+
+
+ in string constants
+
+
+
PostgreSQL also supports another type
+ of escape syntax for strings that allows specifying arbitrary
+ Unicode characters by code point. A Unicode escape string
+ constant starts with U& (upper or lower case
+ letter U followed by ampersand) immediately before the opening
+ quote, without any spaces in between, for
+ example U&'foo'. (Note that this creates an
+ ambiguity with the operator &. Use spaces
+ around the operator to avoid this problem.) Inside the quotes,
+ Unicode characters can be specified in escaped form by writing a
+ backslash followed by the four-digit hexadecimal code point
+ number or alternatively a backslash followed by a plus sign
+ followed by a six-digit hexadecimal code point number. For
+ example, the string 'data' could be written as
+U&'d\0061t\+000061'
+
+ The following less trivial example writes the Russian
+ word slon
(elephant) in Cyrillic letters:
+U&'\0441\043B\043E\043D'
+
+
+
+ If a different escape character than backslash is desired, it can
+ be specified using
+ clause after the string, for example:
+ U&'d!0061t!+000061' UESCAPE '!'
+
+ The escape character can be any single character other than a
+ hexadecimal digit, the plus sign, a single quote, a double quote,
+ or a whitespace character.
+
+
+ The Unicode escape syntax works only when the server encoding is
+ UTF8. When other server encodings are used, only code points in
+ the ASCII range (up to \007F) can be
+ specified.
+
+
+ To include the escape character in the string literally, write it
+ twice.
+
+
+
Dollar-Quoted String Constants
F381 Extended schema manipulation 03 ALTER TABLE statement: DROP CONSTRAINT clause YES
F382 Alter column data type YES
F391 Long identifiers YES
-F392 Unicode escapes in identifiers NO
-F393 Unicode escapes in literals NO
+F392 Unicode escapes in identifiers YES
+F393 Unicode escapes in literals YES
F394 Optional normal form specification NO
F401 Extended joined table YES
F401 Extended joined table 01 NATURAL JOIN YES
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.146 2008/09/01 20:42:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.147 2008/10/29 08:04:52 petere Exp $
*
*-------------------------------------------------------------------------
*/
static void addlit(char *ytext, int yleng);
static void addlitchar(unsigned char ychar);
static char *litbufdup(void);
+static char *litbuf_udeescape(unsigned char escape);
#define lexer_errposition() scanner_errposition(yylloc)
* standard quoted strings
* extended quoted strings (support backslash escape sequences)
* $foo$ quoted strings
+ * quoted identifier with Unicode escapes
+ * quoted string with Unicode escapes
*/
%x xb
%x xe
%x xq
%x xdolq
+%x xui
+%x xus
/*
* In order to make the world safe for Windows and Mac clients as well as
xddouble {dquote}{dquote}
xdinside [^"]+
+/* Unicode escapes */
+uescape [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
+/* error rule to avoid backup */
+uescapefail ("-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU])
+
+/* Quoted identifier with Unicode escapes */
+xuistart [uU]&{dquote}
+xuistop1 {dquote}{whitespace}*{uescapefail}?
+xuistop2 {dquote}{whitespace}*{uescape}
+
+/* Quoted string with Unicode escapes */
+xusstart [uU]&{quote}
+xusstop1 {quote}{whitespace}*{uescapefail}?
+xusstop2 {quote}{whitespace}*{uescape}
+
+/* error rule to avoid backup */
+xufailed [uU]&
+
+
/* C-style comments
*
* The "extended comment" syntax closely resembles allowable operator syntax.
BEGIN(xe);
startlit();
}
+{xusstart} {
+ SET_YYLLOC();
+ BEGIN(xus);
+ startlit();
+ }
{quotestop} |
{quotefail} {
yyless(1);
yylval.str = litbufdup();
return SCONST;
}
-{xqdouble} {
+{xusstop1} {
+ /* throw back all but the quote */
+ yyless(1);
+ BEGIN(INITIAL);
+ yylval.str = litbuf_udeescape('\\');
+ return SCONST;
+ }
+{xusstop2} {
+ BEGIN(INITIAL);
+ yylval.str = litbuf_udeescape(yytext[yyleng-2]);
+ return SCONST;
+ }
+{xqdouble} {
addlitchar('\'');
}
-{xqinside} {
+,xus>{xqinside} {
addlit(yytext, yyleng);
}
{xeinside} {
if (IS_HIGHBIT_SET(c))
saw_high_bit = true;
}
-{quotecontinue} {
+,xus>{quotecontinue} {
/* ignore */
}
. {
/* This is only needed for \ just before EOF */
addlitchar(yytext[0]);
}
-<> { yyerror("unterminated quoted string"); }
+,xus><> { yyerror("unterminated quoted string"); }
{dolqdelim} {
SET_YYLLOC();
BEGIN(xd);
startlit();
}
+{xuistart} {
+ SET_YYLLOC();
+ BEGIN(xui);
+ startlit();
+ }
{xdstop} {
char *ident;
yylval.str = ident;
return IDENT;
}
-{xddouble} {
+{xuistop1} {
+ char *ident;
+
+ BEGIN(INITIAL);
+ if (literallen == 0)
+ yyerror("zero-length delimited identifier");
+ ident = litbuf_udeescape('\\');
+ if (literallen >= NAMEDATALEN)
+ truncate_identifier(ident, literallen, true);
+ yylval.str = ident;
+ /* throw back all but the quote */
+ yyless(1);
+ return IDENT;
+ }
+{xuistop2} {
+ char *ident;
+
+ BEGIN(INITIAL);
+ if (literallen == 0)
+ yyerror("zero-length delimited identifier");
+ ident = litbuf_udeescape(yytext[yyleng - 2]);
+ if (literallen >= NAMEDATALEN)
+ truncate_identifier(ident, literallen, true);
+ yylval.str = ident;
+ return IDENT;
+ }
+{xddouble} {
addlitchar('"');
}
-{xdinside} {
+,xui>{xdinside} {
addlit(yytext, yyleng);
}
-<> { yyerror("unterminated quoted identifier"); }
+<> { yyerror("unterminated quoted identifier"); }
+
+{xufailed} {
+ /* throw back all but the initial u/U */
+ yyless(1);
+ /* and treat it as {other} */
+ return yytext[0];
+ }
{typecast} {
SET_YYLLOC();
return new;
}
+static int
+hexval(unsigned char c)
+{
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ if (c >= 'a' && c <= 'f')
+ return c - 'a' + 0xA;
+ if (c >= 'A' && c <= 'F')
+ return c - 'A' + 0xA;
+ elog(ERROR, "invalid hexadecimal digit");
+ return 0; /* not reached */
+}
+
+static void
+check_unicode_value(pg_wchar c, char * loc)
+{
+ if (GetDatabaseEncoding() == PG_UTF8)
+ return;
+
+ if (c > 0x7F)
+ {
+ yylloc += (char *) loc - literalbuf + 3; /* 3 for U&" */
+ yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8");
+ }
+}
+
+static char *
+litbuf_udeescape(unsigned char escape)
+{
+ char *new;
+ char *in, *out;
+
+ if (isxdigit(escape)
+ || escape == '+'
+ || escape == '\''
+ || escape == '"'
+ || scanner_isspace(escape))
+ {
+ yylloc += literallen + yyleng + 1;
+ yyerror("invalid Unicode escape character");
+ }
+
+ /*
+ * This relies on the subtle assumption that a UTF-8 expansion
+ * cannot be longer than its escaped representation.
+ */
+ new = palloc(literallen + 1);
+
+ in = literalbuf;
+ out = new;
+ while (*in)
+ {
+ if (in[0] == escape)
+ {
+ if (in[1] == escape)
+ {
+ *out++ = escape;
+ in += 2;
+ }
+ else if (isxdigit(in[1]) && isxdigit(in[2]) && isxdigit(in[3]) && isxdigit(in[4]))
+ {
+ pg_wchar unicode = hexval(in[1]) * 16*16*16 + hexval(in[2]) * 16*16 + hexval(in[3]) * 16 + hexval(in[4]);
+ check_unicode_value(unicode, in);
+ unicode_to_utf8(unicode, (unsigned char *) out);
+ in += 5;
+ out += pg_mblen(out);
+ }
+ else if (in[1] == '+'
+ && isxdigit(in[2]) && isxdigit(in[3])
+ && isxdigit(in[4]) && isxdigit(in[5])
+ && isxdigit(in[6]) && isxdigit(in[7]))
+ {
+ pg_wchar unicode = hexval(in[2]) * 16*16*16*16*16 + hexval(in[3]) * 16*16*16*16 + hexval(in[4]) * 16*16*16
+ + hexval(in[5]) * 16*16 + hexval(in[6]) * 16 + hexval(in[7]);
+ check_unicode_value(unicode, in);
+ unicode_to_utf8(unicode, (unsigned char *) out);
+ in += 8;
+ out += pg_mblen(out);
+ }
+ else
+ {
+ yylloc += in - literalbuf + 3; /* 3 for U&" */
+ yyerror("invalid Unicode escape value");
+ }
+ }
+ else
+ *out++ = *in++;
+ }
+
+ *out = '\0';
+ pg_verifymbstr(new, out - new, false);
+ return new;
+}
static unsigned char
unescape_single_char(unsigned char c)
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.79 2008/10/14 17:12:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.80 2008/10/29 08:04:53 petere Exp $
*
*-------------------------------------------------------------------------
*/
{
static unsigned char utf8string[5]; /* need trailing zero */
- if (c <= 0x7F)
- {
- utf8string[0] = c;
- }
- else if (c <= 0x7FF)
- {
- utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
- utf8string[1] = 0x80 | (c & 0x3F);
- }
- else if (c <= 0xFFFF)
- {
- utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
- utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
- utf8string[2] = 0x80 | (c & 0x3F);
- }
- else
- {
- utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
- utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
- utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
- utf8string[3] = 0x80 | (c & 0x3F);
- }
+ unicode_to_utf8(c, utf8string);
return (char *) pg_do_encoding_conversion(utf8string,
pg_mblen((char *) utf8string),
/*
* conversion functions between pg_wchar and multibyte streams.
* Tatsuo Ishii
- * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.67 2008/10/27 19:37:22 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.68 2008/10/29 08:04:53 petere Exp $
*
*/
/* can be used in either frontend or backend */
return cnt;
}
+
+/*
+ * Map a Unicode code point to UTF-8. utf8string must have 4 bytes of
+ * space allocated.
+ */
+unsigned char *
+unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
+{
+ if (c <= 0x7F)
+ {
+ utf8string[0] = c;
+ }
+ else if (c <= 0x7FF)
+ {
+ utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
+ utf8string[1] = 0x80 | (c & 0x3F);
+ }
+ else if (c <= 0xFFFF)
+ {
+ utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
+ utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
+ utf8string[2] = 0x80 | (c & 0x3F);
+ }
+ else
+ {
+ utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
+ utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
+ utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
+ utf8string[3] = 0x80 | (c & 0x3F);
+ }
+
+ return utf8string;
+}
+
+
/*
* Return the byte length of a UTF8 character pointed to by s
*
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.25 2008/05/09 15:36:31 petere Exp $
+ * $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.26 2008/10/29 08:04:53 petere Exp $
*
*-------------------------------------------------------------------------
*/
* standard quoted strings
* extended quoted strings (support backslash escape sequences)
* $foo$ quoted strings
+ * quoted identifier with Unicode escapes
+ * quoted string with Unicode escapes
*/
%x xb
%x xe
%x xq
%x xdolq
+%x xui
+%x xus
/* Additional exclusive states for psql only: lex backslash commands */
%x xslashcmd
%x xslasharg
xddouble {dquote}{dquote}
xdinside [^"]+
+/* Unicode escapes */
+uescape [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
+/* error rule to avoid backup */
+uescapefail ("-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU])
+
+/* Quoted identifier with Unicode escapes */
+xuistart [uU]&{dquote}
+xuistop1 {dquote}{whitespace}*{uescapefail}?
+xuistop2 {dquote}{whitespace}*{uescape}
+
+/* Quoted string with Unicode escapes */
+xusstart [uU]&{quote}
+xusstop1 {quote}{whitespace}*{uescapefail}?
+xusstop2 {quote}{whitespace}*{uescape}
+
+/* error rule to avoid backup */
+xufailed [uU]&
+
+
/* C-style comments
*
* The "extended comment" syntax closely resembles allowable operator syntax.
BEGIN(xe);
ECHO;
}
+{xusstart} {
+ BEGIN(xus);
+ ECHO;
+ }
{quotestop} |
{quotefail} {
yyless(1);
BEGIN(INITIAL);
ECHO;
}
-{xqdouble} {
+{xusstop1} {
+ yyless(1);
+ BEGIN(INITIAL);
+ ECHO;
+ }
+{xusstop2} {
+ BEGIN(INITIAL);
+ ECHO;
+ }
+{xqdouble} {
ECHO;
}
-{xqinside} {
+,xus>{xqinside} {
ECHO;
}
{xeinside} {
{xehexesc} {
ECHO;
}
-{quotecontinue} {
+,xus>{quotecontinue} {
ECHO;
}
. {
BEGIN(xd);
ECHO;
}
+{xuistart} {
+ BEGIN(xui);
+ ECHO;
+ }
{xdstop} {
BEGIN(INITIAL);
ECHO;
}
-{xddouble} {
+{xuistop1} {
+ yyless(1);
+ BEGIN(INITIAL);
+ ECHO;
+ }
+{xuistop2} {
+ BEGIN(INITIAL);
ECHO;
}
-{xdinside} {
+{xddouble} {
+ ECHO;
+ }
+{xdinside} {
+ ECHO;
+ }
+
+{xufailed} {
+ /* throw back all but the initial u/U */
+ yyless(1);
ECHO;
}
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.79 2008/06/18 18:42:54 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.80 2008/10/29 08:04:53 petere Exp $
*
* NOTES
* This is used both by the backend and by libpq, but should not be
extern int pg_valid_client_encoding(const char *name);
extern int pg_valid_server_encoding(const char *name);
+extern unsigned char *unicode_to_utf8(pg_wchar c, unsigned char *utf8string);
extern int pg_utf_mblen(const unsigned char *);
extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len,
int src_encoding,
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.166 2008/05/20 23:17:32 meskes Exp $
+ * $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.167 2008/10/29 08:04:53 petere Exp $
*
*-------------------------------------------------------------------------
*/
* extended quoted strings (support backslash escape sequences)
* national character quoted strings
* $foo$ quoted strings
+ * quoted identifier with Unicode escapes
+ * quoted string with Unicode escapes
*/
%x xb
%x xdolq
%x xcond
%x xskip
+%x xui
+%x xus
/* Bit string
*/
xddouble {dquote}{dquote}
xdinside [^"]+
+/* Unicode escapes */
+/* (The ecpg scanner is not backup-free, so the fail rules in scan.l are not needed here, but could be added if desired.) */
+uescape [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
+
+/* Quoted identifier with Unicode escapes */
+xuistart [uU]&{dquote}
+xuistop {dquote}({whitespace}*{uescape})?
+
+/* Quoted string with Unicode escapes */
+xusstart [uU]&{quote}
+xusstop {quote}({whitespace}*{uescape})?
+
/* special stuff for C strings */
xdcqq \\\\
xdcqdq \\\"
BEGIN(xe);
startlit();
}
+{xusstart} {
+ token_start = yytext;
+ state_before = YYSTATE;
+ BEGIN(xus);
+ startlit();
+ addlit(yytext, yyleng);
+ }
{quotestop} |
{quotefail} {
yyless(1);
yylval.str = mm_strdup(literalbuf);
return NCONST;
}
-{xqdouble} { addlitchar('\''); }
+{xusstop} {
+ addlit(yytext, yyleng);
+ BEGIN(state_before);
+ yylval.str = mm_strdup(literalbuf);
+ return UCONST;
+ }
+{xqdouble} { addlitchar('\''); }
{xqcquote} {
addlitchar('\\');
addlitchar('\'');
}
-{xqinside} { addlit(yytext, yyleng); }
+,xus>{xqinside} { addlit(yytext, yyleng); }
{xeinside} { addlit(yytext, yyleng); }
{xeescape} { addlit(yytext, yyleng); }
{xeoctesc} { addlit(yytext, yyleng); }
{xehexesc} { addlit(yytext, yyleng); }
-{quotecontinue} { /* ignore */ }
+,xus>{quotecontinue} { /* ignore */ }
. {
/* This is only needed for \ just before EOF */
addlitchar(yytext[0]);
}
-<> { mmerror(PARSE_ERROR, ET_FATAL, "unterminated quoted string"); }
+,xus><> { mmerror(PARSE_ERROR, ET_FATAL, "unterminated quoted string"); }
{dolqfailed} {
/* throw back all but the initial "$" */
yyless(1);
BEGIN(xd);
startlit();
}
+{xuistart} {
+ state_before = YYSTATE;
+ BEGIN(xui);
+ startlit();
+ addlit(yytext, yyleng);
+ }
{xdstop} {
BEGIN(state_before);
if (literallen == 0)
yylval.str = mm_strdup(literalbuf);
return CSTRING;
}
-{xddouble} { addlitchar('"'); }
-{xdinside} { addlit(yytext, yyleng); }
-<> { mmerror(PARSE_ERROR, ET_FATAL, "unterminated quoted identifier"); }
+{xuistop} {
+ BEGIN(state_before);
+ if (literallen == 2) /* "U&" */
+ mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");
+ /* The backend will truncate the idnetifier here. We do not as it does not change the result. */
+ addlit(yytext, yyleng);
+ yylval.str = mm_strdup(literalbuf);
+ return UIDENT;
+ }
+{xddouble} { addlitchar('"'); }
+{xdinside} { addlit(yytext, yyleng); }
+<> { mmerror(PARSE_ERROR, ET_FATAL, "unterminated quoted identifier"); }
{xdstart} {
state_before = YYSTATE;
BEGIN(xdc);
-/* $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/preproc.y,v 1.379 2008/10/28 14:09:45 petere Exp $ */
+/* $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/preproc.y,v 1.380 2008/10/29 08:04:53 petere Exp $ */
/* Copyright comment */
%{
/* Special token types, not actually keywords - see the "lex" file */
%token IDENT SCONST Op CSTRING CVARIABLE CPP_LINE IP BCONST
-%token XCONST DOLCONST ECONST NCONST
+%token XCONST DOLCONST ECONST NCONST UCONST UIDENT
%token ICONST PARAM
%token FCONST
$$[strlen($1)+3]='\0';
free($1);
}
+ | UCONST
+ {
+ $$ = $1;
+ }
| DOLCONST
{
$$ = $1;
;
ident: IDENT { $$ = $1; }
| CSTRING { $$ = make3_str(make_str("\""), $1, make_str("\"")); }
+ | UIDENT { $$ = $1; }
;
quoted_ident_stringvar: name
test: preproc/comment
test: preproc/define
test: preproc/init
+test: preproc/strings
test: preproc/type
test: preproc/variable
test: preproc/whenever
test: preproc/comment
test: preproc/define
test: preproc/init
+test: preproc/strings
test: preproc/type
test: preproc/variable
test: preproc/whenever
--- /dev/null
+/* Processed by ecpg (regression mode) */
+/* These include files are added by the preprocessor */
+#include
+#include
+#include
+/* End of automatic include section */
+#define ECPGdebug(X,Y) ECPGdebug((X)+100,(Y))
+
+#line 1 "strings.pgc"
+#include
+
+
+#line 1 "regression.h"
+
+
+
+
+
+
+#line 3 "strings.pgc"
+
+
+/* exec sql begin declare section */
+
+
+#line 6 "strings.pgc"
+ char * s1 , * s2 , * s3 , * s4 , * s5 , * s6 ;
+/* exec sql end declare section */
+#line 7 "strings.pgc"
+
+
+int main(void)
+{
+ ECPGdebug(1, stderr);
+
+ { ECPGconnect(__LINE__, 0, "regress1" , NULL, NULL , NULL, 0); }
+#line 13 "strings.pgc"
+
+
+ { ECPGdo(__LINE__, 0, 1, NULL, 0, ECPGst_normal, "select 'abcdef' , N'abcdef' as foo , E'abc\\bdef' as \"foo\" , U&'d\\0061t\\0061' as U&\"foo\" , U&'d!+000061t!+000061' uescape '!' , $foo$abc$def$foo$ ", ECPGt_EOIT,
+ ECPGt_char,&(s1),(long)0,(long)1,(1)*sizeof(char),
+ ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L,
+ ECPGt_char,&(s2),(long)0,(long)1,(1)*sizeof(char),
+ ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L,
+ ECPGt_char,&(s3),(long)0,(long)1,(1)*sizeof(char),
+ ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L,
+ ECPGt_char,&(s4),(long)0,(long)1,(1)*sizeof(char),
+ ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L,
+ ECPGt_char,&(s5),(long)0,(long)1,(1)*sizeof(char),
+ ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L,
+ ECPGt_char,&(s6),(long)0,(long)1,(1)*sizeof(char),
+ ECPGt_NO_INDICATOR, NULL , 0L, 0L, 0L, ECPGt_EORT);}
+#line 21 "strings.pgc"
+
+
+ printf("%s %s %s %s %s %s\n", s1, s2, s3, s4, s5, s6);
+
+ { ECPGdisconnect(__LINE__, "CURRENT");}
+#line 25 "strings.pgc"
+
+ exit (0);
+}
--- /dev/null
+[NO_PID]: ECPGdebug: set to 1
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ECPGconnect: opening database regress1 on port
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_execute on line 15: query: select 'abcdef' , N'abcdef' as foo , E'abc\bdef' as "foo" , U&'d\0061t\0061' as U&"foo" , U&'d!+000061t!+000061' uescape '!' , $foo$abc$def$foo$ ; with 0 parameter(s) on connection regress1
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_execute on line 15: using PQexec
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_execute on line 15: correctly got 1 tuples with 6 fields
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_store_result on line 15: allocating memory for 1 tuples
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_get_data on line 15: RESULT: abcdef offset: -1; array: yes
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_store_result on line 15: allocating memory for 1 tuples
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_get_data on line 15: RESULT: abcdef offset: -1; array: yes
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_store_result on line 15: allocating memory for 1 tuples
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_get_data on line 15: RESULT: abc\bdef offset: -1; array: yes
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_store_result on line 15: allocating memory for 1 tuples
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_get_data on line 15: RESULT: data offset: -1; array: yes
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_store_result on line 15: allocating memory for 1 tuples
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_get_data on line 15: RESULT: data offset: -1; array: yes
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_store_result on line 15: allocating memory for 1 tuples
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_get_data on line 15: RESULT: abc$def offset: -1; array: yes
+[NO_PID]: sqlca: code: 0, state: 00000
+[NO_PID]: ecpg_finish: connection regress1 closed
+[NO_PID]: sqlca: code: 0, state: 00000
--- /dev/null
+abcdef abcdef abc\bdef data data abc$def
comment comment.c \
define define.c \
init init.c \
+ strings strings.c \
type type.c \
variable variable.c \
whenever whenever.c
--- /dev/null
+#include
+
+exec sql include ../regression;
+
+exec sql begin declare section;
+char *s1, *s2, *s3, *s4, *s5, *s6;
+exec sql end declare section;
+
+int main(void)
+{
+ ECPGdebug(1, stderr);
+
+ exec sql connect to REGRESSDB1;
+
+ exec sql select 'abcdef',
+ N'abcdef' AS foo,
+ E'abc\bdef' AS "foo",
+ U&'d\0061t\0061' AS U&"foo",
+ U&'d!+000061t!+000061' uescape '!',
+ $foo$abc$def$foo$
+ into :s1, :s2, :s3, :s4, :s5, :s6;
+
+ printf("%s %s %s %s %s %s\n", s1, s2, s3, s4, s5, s6);
+
+ exec sql disconnect;
+ exit (0);
+}
ERROR: syntax error at or near "' - third line'"
LINE 3: ' - third line'
^
+-- Unicode escapes
+SELECT U&'d\0061t\+000061' AS U&"d\0061t\+000061";
+ data
+------
+ data
+(1 row)
+
+SELECT U&'d!0061t\+000061' UESCAPE '!' AS U&"d*0061t\+000061" UESCAPE '*';
+ dat\+000061
+-------------
+ dat\+000061
+(1 row)
+
+SELECT U&'wrong: \061';
+ERROR: invalid Unicode escape value at or near "\061'"
+LINE 1: SELECT U&'wrong: \061';
+ ^
+SELECT U&'wrong: \+0061';
+ERROR: invalid Unicode escape value at or near "\+0061'"
+LINE 1: SELECT U&'wrong: \+0061';
+ ^
+SELECT U&'wrong: +0061' UESCAPE '+';
+ERROR: invalid Unicode escape character at or near "+'"
+LINE 1: SELECT U&'wrong: +0061' UESCAPE '+';
+ ^
--
-- test conversions between various string types
-- E021-10 implicit casting among the character data types
' - third line'
AS "Illegal comment within continuation";
+-- Unicode escapes
+SELECT U&'d\0061t\+000061' AS U&"d\0061t\+000061";
+SELECT U&'d!0061t\+000061' UESCAPE '!' AS U&"d*0061t\+000061" UESCAPE '*';
+
+SELECT U&'wrong: \061';
+SELECT U&'wrong: \+0061';
+SELECT U&'wrong: +0061' UESCAPE '+';
+
--
-- test conversions between various string types
-- E021-10 implicit casting among the character data types