Improve lexer's error reporting. You get the whole token mentioned now
authorTom Lane
Wed, 1 May 2002 17:12:08 +0000 (17:12 +0000)
committerTom Lane
Wed, 1 May 2002 17:12:08 +0000 (17:12 +0000)
in parse error messages, not just the part scanned by the last flex rule.
For example,
select "foo" "bar";
used to draw
ERROR:  parser: parse error at or near """
which was rather unhelpful.  Now it gives
ERROR:  parser: parse error at or near ""bar""
Also, error messages concerning bitstring literals and suchlike will
quote the source text at you, not the processed internal form of the literal.

src/backend/parser/scan.l
src/backend/po/nls.mk
src/test/regress/expected/strings.out

index cb8610c87ac384a3e0730eb67e33283ff4af7365..f59cd7b27b439a48a4884cb150abe7f3e5bd40d2 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.92 2002/04/20 21:56:14 petere Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.93 2002/05/01 17:12:07 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -56,6 +56,17 @@ static void addlit(char *ytext, int yleng);
 static void addlitchar(unsigned char ychar);
 static char *litbufdup(void);
 
+/*
+ * When we parse a token that requires multiple lexer rules to process,
+ * we set token_start to point at the true start of the token, for use
+ * by yyerror().  yytext will point at just the text consumed by the last
+ * rule, so it's not very helpful (eg, it might contain just the last
+ * quote mark of a quoted identifier).  But to avoid cluttering every rule
+ * with setting token_start, we allow token_start = NULL to denote that
+ * it's okay to use yytext.
+ */
+static char       *token_start;
+
 /* Handles to the buffer that the lexer uses internally */
 static YY_BUFFER_STATE scanbufhandle;
 static char *scanbuf;
@@ -208,7 +219,7 @@ non_newline     [^\n\r]
 
 comment            ("--"{non_newline}*)
 
-whitespace     ({space}|{comment})
+whitespace     ({space}+|{comment})
 
 /*
  * SQL92 requires at least one newline in the whitespace separating
@@ -235,9 +246,16 @@ other          .
  */
 
 %%
+
+%{
+                   /* code to execute during start of each call of yylex() */
+                   token_start = NULL;
+%}
+
 {whitespace}   { /* ignore */ }
 
 {xcstart}      {
+                   token_start = yytext;
                    xcdepth = 0;
                    BEGIN(xc);
                    /* Put back any characters past slash-star; see above */
@@ -252,7 +270,11 @@ other          .
 
 {xcstop}   {
                    if (xcdepth <= 0)
+                   {
                        BEGIN(INITIAL);
+                       /* reset token_start for next token */
+                       token_start = NULL;
+                   }
                    else
                        xcdepth--;
                }
@@ -261,9 +283,10 @@ other          .
 
 {op_chars} { /* ignore */ }
 
-<>        { elog(ERROR, "Unterminated /* comment"); }
+<>        { yyerror("unterminated /* comment"); }
 
 {xbitstart}        {
+                   token_start = yytext;
                    BEGIN(xbit);
                    startlit();
                    addlitchar('b');
@@ -271,8 +294,7 @@ other           .
 {xbitstop}   {
                    BEGIN(INITIAL);
                    if (literalbuf[strspn(literalbuf + 1, "01") + 1] != '\0')
-                       elog(ERROR, "invalid bit string input: '%s'",
-                            literalbuf);
+                       yyerror("invalid bit string input");
                    yylval.str = litbufdup();
                    return BITCONST;
                }
@@ -284,9 +306,10 @@ other          .
 {xbitcat}        {
                    /* ignore */
                }
-<>      { elog(ERROR, "unterminated bit string literal"); }
+<>      { yyerror("unterminated bit string literal"); }
 
 {xhstart}      {
+                   token_start = yytext;
                    BEGIN(xh);
                    startlit();
                }
@@ -303,14 +326,14 @@ other         .
                        || val != (long) ((int32) val)
 #endif
                        )
-                       elog(ERROR, "Bad hexadecimal integer input '%s'",
-                            literalbuf);
+                       yyerror("bad hexadecimal integer input");
                    yylval.ival = val;
                    return ICONST;
                }
-<>        { elog(ERROR, "Unterminated hexadecimal integer"); }
+<>        { yyerror("unterminated hexadecimal integer"); }
 
 {xqstart}      {
+                   token_start = yytext;
                    BEGIN(xq);
                    startlit();
                }
@@ -335,30 +358,31 @@ other         .
 {xqcat}        {
                    /* ignore */
                }
-<>        { elog(ERROR, "Unterminated quoted string"); }
+<>        { yyerror("unterminated quoted string"); }
 
 
 {xdstart}      {
+                   token_start = yytext;
                    BEGIN(xd);
                    startlit();
                }
 {xdstop}   {
                    BEGIN(INITIAL);
-                   if (strlen(literalbuf) == 0)
-                       elog(ERROR, "zero-length delimited identifier");
-                   if (strlen(literalbuf) >= NAMEDATALEN)
+                   if (literallen == 0)
+                       yyerror("zero-length delimited identifier");
+                   if (literallen >= NAMEDATALEN)
                    {
-#ifdef MULTIBYTE
                        int len;
-                       len = pg_mbcliplen(literalbuf,strlen(literalbuf),NAMEDATALEN-1);
-                       elog(WARNING, "identifier \"%s\" will be truncated to \"%.*s\"",
-                            literalbuf, len, literalbuf);
-                       literalbuf[len] = '\0';
+#ifdef MULTIBYTE
+                       len = pg_mbcliplen(literalbuf, literallen,
+                                          NAMEDATALEN-1);
 #else
-                       elog(WARNING, "identifier \"%s\" will be truncated to \"%.*s\"",
-                            literalbuf, NAMEDATALEN-1, literalbuf);
-                       literalbuf[NAMEDATALEN-1] = '\0';
+                       len = NAMEDATALEN-1;
 #endif
+                       elog(NOTICE, "identifier \"%s\" will be truncated to \"%.*s\"",
+                            literalbuf, len, literalbuf);
+                       literalbuf[len] = '\0';
+                       literallen = len;
                    }
                    yylval.str = litbufdup();
                    return IDENT;
@@ -369,7 +393,7 @@ other           .
 {xdinside} {
                    addlit(yytext, yyleng);
                }
-<>        { elog(ERROR, "Unterminated quoted identifier"); }
+<>        { yyerror("unterminated quoted identifier"); }
 
 {typecast}     { return TYPECAST; }
 
@@ -383,8 +407,8 @@ other           .
                     * character will match a prior rule, not this one.
                     */
                    int     nchars = yyleng;
-                   char   *slashstar = strstr((char*)yytext, "/*");
-                   char   *dashdash = strstr((char*)yytext, "--");
+                   char   *slashstar = strstr(yytext, "/*");
+                   char   *dashdash = strstr(yytext, "--");
 
                    if (slashstar && dashdash)
                    {
@@ -395,7 +419,7 @@ other           .
                    else if (!slashstar)
                        slashstar = dashdash;
                    if (slashstar)
-                       nchars = slashstar - ((char*)yytext);
+                       nchars = slashstar - yytext;
 
                    /*
                     * For SQL92 compatibility, '+' and '-' cannot be the
@@ -437,15 +461,15 @@ other         .
                    }
 
                    /* Convert "!=" operator to "<>" for compatibility */
-                   if (strcmp((char*)yytext, "!=") == 0)
+                   if (strcmp(yytext, "!=") == 0)
                        yylval.str = pstrdup("<>");
                    else
-                       yylval.str = pstrdup((char*)yytext);
+                       yylval.str = pstrdup(yytext);
                    return Op;
                }
 
 {param}            {
-                   yylval.ival = atol((char*)&yytext[1]);
+                   yylval.ival = atol(yytext + 1);
                    return PARAM;
                }
 
@@ -454,7 +478,7 @@ other           .
                    char* endptr;
 
                    errno = 0;
-                   val = strtol((char *)yytext, &endptr, 10);
+                   val = strtol(yytext, &endptr, 10);
                    if (*endptr != '\0' || errno == ERANGE
 #ifdef HAVE_LONG_INT_64
                        /* if long > 32 bits, check for overflow of int4 */
@@ -463,28 +487,29 @@ other         .
                        )
                    {
                        /* integer too large, treat it as a float */
-                       yylval.str = pstrdup((char*)yytext);
+                       yylval.str = pstrdup(yytext);
                        return FCONST;
                    }
                    yylval.ival = val;
                    return ICONST;
                }
 {decimal}      {
-                   yylval.str = pstrdup((char*)yytext);
+                   yylval.str = pstrdup(yytext);
                    return FCONST;
                }
 {real}         {
-                   yylval.str = pstrdup((char*)yytext);
+                   yylval.str = pstrdup(yytext);
                    return FCONST;
                }
 
 
 {identifier}   {
                    ScanKeyword    *keyword;
+                   char           *ident;
                    int             i;
 
                    /* Is it a keyword? */
-                   keyword = ScanKeywordLookup((char*) yytext);
+                   keyword = ScanKeywordLookup(yytext);
                    if (keyword != NULL)
                        return keyword->value;
 
@@ -496,26 +521,25 @@ other         .
                     * which seems appropriate under SQL99 rules, whereas
                     * the keyword comparison was NOT locale-dependent.
                     */
-                   for (i = 0; yytext[i]; i++)
+                   ident = pstrdup(yytext);
+                   for (i = 0; ident[i]; i++)
                    {
-                       if (isupper((unsigned char) yytext[i]))
-                           yytext[i] = tolower((unsigned char) yytext[i]);
+                       if (isupper((unsigned char) ident[i]))
+                           ident[i] = tolower((unsigned char) ident[i]);
                    }
                    if (i >= NAMEDATALEN)
                     {
-#ifdef MULTIBYTE
                        int len;
-                       len = pg_mbcliplen(yytext,i,NAMEDATALEN-1);
-                        elog(WARNING, "identifier \"%s\" will be truncated to \"%.*s\"",
-                             yytext, len, yytext);
-                       yytext[len] = '\0';
+#ifdef MULTIBYTE
+                       len = pg_mbcliplen(ident, i, NAMEDATALEN-1);
 #else
-                        elog(WARNING, "identifier \"%s\" will be truncated to \"%.*s\"",
-                             yytext, NAMEDATALEN-1, yytext);
-                       yytext[NAMEDATALEN-1] = '\0';
+                       len = NAMEDATALEN-1;
 #endif
+                        elog(NOTICE, "identifier \"%s\" will be truncated to \"%.*s\"",
+                             ident, len, ident);
+                       ident[len] = '\0';
                     }
-                   yylval.str = pstrdup((char*) yytext);
+                   yylval.str = ident;
                    return IDENT;
                }
 
@@ -526,7 +550,8 @@ other           .
 void
 yyerror(const char *message)
 {
-   elog(ERROR, "parser: %s at or near \"%s\"", message, yytext);
+   elog(ERROR, "parser: %s at or near \"%s\"", message,
+        token_start ? token_start : yytext);
 }
 
 
index 75975029b3b390c97fe56f3782c5455a22a31120..8797d8527d8b26cd5073bcb23afa2ca40691b9bc 100644 (file)
@@ -1,4 +1,4 @@
 CATALOG_NAME   := postgres
 AVAIL_LANGUAGES    := cs de hu ru zh_CN zh_TW
 GETTEXT_FILES  := + gettext-files
-GETTEXT_TRIGGERS:= elog:2 postmaster_error
+GETTEXT_TRIGGERS:= elog:2 postmaster_error yyerror
index 42df7c06df2e698b8357878b0053684be81b6ef8..ebfe8eeb66328d0014fc1981638dba5698a84a34 100644 (file)
@@ -17,7 +17,7 @@ SELECT 'first line'
 ' - next line' /* this comment is not allowed here */
 ' - third line'
    AS "Illegal comment within continuation";
-ERROR:  parser: parse error at or near "'"
+ERROR:  parser: parse error at or near "' - third line'"
 --
 -- test conversions between various string types
 --