Fix up some oversights in psql's Unicode-escape support.
authorTom Lane
Wed, 27 Oct 2010 02:23:04 +0000 (22:23 -0400)
committerTom Lane
Wed, 27 Oct 2010 02:25:19 +0000 (22:25 -0400)
Original patch failed to include new exclusive states in a switch that
needed to include them; and also was guilty of very fuzzy thinking
about how to handle error cases.  Per bug #5729 from Alan Choi.

src/bin/psql/psqlscan.l

index 0651fe26513a11891d4f637b9417ae9e53413ccb..7942fe5c4586d820d984af06c65268a9625c186c 100644 (file)
@@ -120,7 +120,6 @@ static bool var_is_current_source(PsqlScanState state, const char *varname);
 static YY_BUFFER_STATE prepare_buffer(const char *txt, int len,
                                      char **txtcopy);
 static void emit(const char *txt, int len);
-static bool is_utf16_surrogate_first(uint32 c);
 static void escape_variable(bool as_ident);
 
 #define ECHO emit(yytext, yyleng)
@@ -163,7 +162,11 @@ static void escape_variable(bool as_ident);
  *   $foo$ quoted strings
  *   quoted identifier with Unicode escapes
  *   quoted string with Unicode escapes
- *   Unicode surrogate pair in extended quoted string
+ *
+ * Note: we intentionally don't mimic the backend's  state; we have
+ * no need to distinguish it from  state, and no good way to get out
+ * of it in error cases.  The backend just throws yyerror() in those
+ * cases, but that's not an option here.
  */
 
 %x xb
@@ -175,7 +178,6 @@ static void escape_variable(bool as_ident);
 %x xdolq
 %x xui
 %x xus
-%x xeu
 /* Additional exclusive states for psql only: lex backslash commands */
 %x xslashcmd
 %x xslasharg
@@ -529,19 +531,9 @@ other          .
                    ECHO;
                }
 {xeunicode} {
-                   uint32 c = strtoul(yytext+2, NULL, 16);
-
-                   if (is_utf16_surrogate_first(c))
-                       BEGIN(xeu);
-                   ECHO;
-               }
-{xeunicode} {
-                   BEGIN(xe);
                    ECHO;
                }
-.         { ECHO; }
-\n            { ECHO; }
-{xeunicodefail}    {
+{xeunicodefail}    {
                    ECHO;
                }
 {xeescape}  {
@@ -1242,6 +1234,7 @@ psql_scan(PsqlScanState state,
        case LEXRES_EOL:        /* end of input */
            switch (state->start_state)
            {
+               /* This switch must cover all non-slash-command states. */
                case INITIAL:
                    if (state->paren_depth > 0)
                    {
@@ -1276,11 +1269,11 @@ psql_scan(PsqlScanState state,
                    result = PSCAN_INCOMPLETE;
                    *prompt = PROMPT_SINGLEQUOTE;
                    break;
-               case xq:
+               case xe:
                    result = PSCAN_INCOMPLETE;
                    *prompt = PROMPT_SINGLEQUOTE;
                    break;
-               case xe:
+               case xq:
                    result = PSCAN_INCOMPLETE;
                    *prompt = PROMPT_SINGLEQUOTE;
                    break;
@@ -1288,6 +1281,14 @@ psql_scan(PsqlScanState state,
                    result = PSCAN_INCOMPLETE;
                    *prompt = PROMPT_DOLLARQUOTE;
                    break;
+               case xui:
+                   result = PSCAN_INCOMPLETE;
+                   *prompt = PROMPT_DOUBLEQUOTE;
+                   break;
+               case xus:
+                   result = PSCAN_INCOMPLETE;
+                   *prompt = PROMPT_SINGLEQUOTE;
+                   break;
                default:
                    /* can't get here */
                    fprintf(stderr, "invalid YY_START\n");
@@ -1814,12 +1815,6 @@ emit(const char *txt, int len)
    }
 }
 
-static bool
-is_utf16_surrogate_first(uint32 c)
-{
-   return (c >= 0xD800 && c <= 0xDBFF);
-}
-
 static void
 escape_variable(bool as_ident)
 {