Improve parser's one-extra-token lookahead mechanism.

author Tom Lane

Tue, 24 Feb 2015 22:53:42 +0000 (17:53 -0500)

committer Tom Lane

Tue, 24 Feb 2015 22:53:45 +0000 (17:53 -0500)
author Tom Lane
Tue, 24 Feb 2015 22:53:42 +0000 (17:53 -0500)
committer Tom Lane
Tue, 24 Feb 2015 22:53:45 +0000 (17:53 -0500)
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y

index 6c21002875394b9b8f04800cc81b994ebafbe705..581f7a1c1c64fa20687a372740770345fef35992 100644 (file)
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -633,9 +633,9 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
  /*
   * The grammar thinks these are keywords, but they are not in the kwlist.h
   * list and so can never be entered directly.  The filter in parser.c
- * creates these tokens when required.
+ * creates these tokens when required (based on looking one token ahead).
   */
-%token         NULLS_FIRST NULLS_LAST WITH_ORDINALITY WITH_TIME
+%token         NULLS_LA WITH_LA
  
  
  /* Precedence: lowest to highest */
@@ -873,6 +873,7 @@ CreateRoleStmt:
  
  
  opt_with:  WITH                                    {}
+           | WITH_LA                               {}
             | /*EMPTY*/                             {}
         ;
  
@@ -6673,8 +6674,8 @@ opt_asc_desc: ASC                         { $$ = SORTBY_ASC; }
             | /*EMPTY*/                     { $$ = SORTBY_DEFAULT; }
         ;
  
-opt_nulls_order: NULLS_FIRST               { $$ = SORTBY_NULLS_FIRST; }
-           | NULLS_LAST                    { $$ = SORTBY_NULLS_LAST; }
+opt_nulls_order: NULLS_LA FIRST_P          { $$ = SORTBY_NULLS_FIRST; }
+           | NULLS_LA LAST_P               { $$ = SORTBY_NULLS_LAST; }
             | /*EMPTY*/                     { $$ = SORTBY_NULLS_DEFAULT; }
         ;
  
@@ -8923,7 +8924,7 @@ AlterTSDictionaryStmt:
         ;
  
  AlterTSConfigurationStmt:
-           ALTER TEXT_P SEARCH CONFIGURATION any_name ADD_P MAPPING FOR name_list WITH any_name_list
+           ALTER TEXT_P SEARCH CONFIGURATION any_name ADD_P MAPPING FOR name_list any_with any_name_list
                 {
                     AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt);
                     n->cfgname = $5;
@@ -8933,7 +8934,7 @@ AlterTSConfigurationStmt:
                     n->replace = false;
                     $$ = (Node*)n;
                 }
-           | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER MAPPING FOR name_list WITH any_name_list
+           | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER MAPPING FOR name_list any_with any_name_list
                 {
                     AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt);
                     n->cfgname = $5;
@@ -8943,7 +8944,7 @@ AlterTSConfigurationStmt:
                     n->replace = false;
                     $$ = (Node*)n;
                 }
-           | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER MAPPING REPLACE any_name WITH any_name
+           | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER MAPPING REPLACE any_name any_with any_name
                 {
                     AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt);
                     n->cfgname = $5;
@@ -8953,7 +8954,7 @@ AlterTSConfigurationStmt:
                     n->replace = true;
                     $$ = (Node*)n;
                 }
-           | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER MAPPING FOR name_list REPLACE any_name WITH any_name
+           | ALTER TEXT_P SEARCH CONFIGURATION any_name ALTER MAPPING FOR name_list REPLACE any_name any_with any_name
                 {
                     AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt);
                     n->cfgname = $5;
@@ -8981,6 +8982,11 @@ AlterTSConfigurationStmt:
                 }
         ;
  
+/* Use this if TIME or ORDINALITY after WITH should be taken as an identifier */
+any_with:  WITH                                    {}
+           | WITH_LA                               {}
+       ;
+
  
  /*****************************************************************************
   *
@@ -9891,6 +9897,8 @@ simple_select:
   *     AS (query) [ SEARCH or CYCLE clause ]
   *
   * We don't currently support the SEARCH or CYCLE clause.
+ *
+ * Recognizing WITH_LA here allows a CTE to be named TIME or ORDINALITY.
   */
  with_clause:
         WITH cte_list
@@ -9900,6 +9908,13 @@ with_clause:
                 $$->recursive = false;
                 $$->location = @1;
             }
+       | WITH_LA cte_list
+           {
+               $$ = makeNode(WithClause);
+               $$->ctes = $2;
+               $$->recursive = false;
+               $$->location = @1;
+           }
         | WITH RECURSIVE cte_list
             {
                 $$ = makeNode(WithClause);
@@ -10601,7 +10616,7 @@ opt_col_def_list: AS '(' TableFuncElementList ')'   { $$ = $3; }
             | /*EMPTY*/                             { $$ = NIL; }
         ;
  
-opt_ordinality: WITH_ORDINALITY                        { $$ = true; }
+opt_ordinality: WITH_LA ORDINALITY                 { $$ = true; }
             | /*EMPTY*/                             { $$ = false; }
         ;
  
@@ -11057,7 +11072,7 @@ ConstInterval:
         ;
  
  opt_timezone:
-           WITH_TIME ZONE                          { $$ = TRUE; }
+           WITH_LA TIME ZONE                       { $$ = TRUE; }
             | WITHOUT TIME ZONE                     { $$ = FALSE; }
             | /*EMPTY*/                             { $$ = FALSE; }
         ;
diff --git a/src/backend/parser/parser.c b/src/backend/parser/parser.c

index db49275e00a476743ac399d96c72f265ab546c1f..b17771d4cca2ee532e6b519dab7ba92324b9e6ea 100644 (file)
--- a/src/backend/parser/parser.c
+++ b/src/backend/parser/parser.c
@@ -64,13 +64,13 @@ raw_parser(const char *str)
  /*
   * Intermediate filter between parser and core lexer (core_yylex in scan.l).
   *
- * The filter is needed because in some cases the standard SQL grammar
+ * This filter is needed because in some cases the standard SQL grammar
   * requires more than one token lookahead.  We reduce these cases to one-token
- * lookahead by combining tokens here, in order to keep the grammar LALR(1).
+ * lookahead by replacing tokens here, in order to keep the grammar LALR(1).
   *
   * Using a filter is simpler than trying to recognize multiword tokens
   * directly in scan.l, because we'd have to allow for comments between the
- * words.  Furthermore it's not clear how to do it without re-introducing
+ * words.  Furthermore it's not clear how to do that without re-introducing
   * scanner backtrack, which would cost more performance than this filter
   * layer does.
   *
@@ -84,7 +84,7 @@ base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
     base_yy_extra_type *yyextra = pg_yyget_extra(yyscanner);
     int         cur_token;
     int         next_token;
-   core_YYSTYPE cur_yylval;
+   int         cur_token_length;
     YYLTYPE     cur_yylloc;
  
     /* Get next token --- we might already have it */
@@ -93,74 +93,85 @@ base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
         cur_token = yyextra->lookahead_token;
         lvalp->core_yystype = yyextra->lookahead_yylval;
         *llocp = yyextra->lookahead_yylloc;
+       *(yyextra->lookahead_end) = yyextra->lookahead_hold_char;
         yyextra->have_lookahead = false;
     }
     else
         cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
  
-   /* Do we need to look ahead for a possible multiword token? */
+   /*
+    * If this token isn't one that requires lookahead, just return it.  If it
+    * does, determine the token length.  (We could get that via strlen(), but
+    * since we have such a small set of possibilities, hardwiring seems
+    * feasible and more efficient.)
+    */
     switch (cur_token)
     {
         case NULLS_P:
+           cur_token_length = 5;
+           break;
+       case WITH:
+           cur_token_length = 4;
+           break;
+       default:
+           return cur_token;
+   }
  
-           /*
-            * NULLS FIRST and NULLS LAST must be reduced to one token
-            */
-           cur_yylval = lvalp->core_yystype;
-           cur_yylloc = *llocp;
-           next_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
+   /*
+    * Identify end+1 of current token.  core_yylex() has temporarily stored a
+    * '\0' here, and will undo that when we call it again.  We need to redo
+    * it to fully revert the lookahead call for error reporting purposes.
+    */
+   yyextra->lookahead_end = yyextra->core_yy_extra.scanbuf +
+       *llocp + cur_token_length;
+   Assert(*(yyextra->lookahead_end) == '\0');
+
+   /*
+    * Save and restore *llocp around the call.  It might look like we could
+    * avoid this by just passing &lookahead_yylloc to core_yylex(), but that
+    * does not work because flex actually holds onto the last-passed pointer
+    * internally, and will use that for error reporting.  We need any error
+    * reports to point to the current token, not the next one.
+    */
+   cur_yylloc = *llocp;
+
+   /* Get next token, saving outputs into lookahead variables */
+   next_token = core_yylex(&(yyextra->lookahead_yylval), llocp, yyscanner);
+   yyextra->lookahead_token = next_token;
+   yyextra->lookahead_yylloc = *llocp;
+
+   *llocp = cur_yylloc;
+
+   /* Now revert the un-truncation of the current token */
+   yyextra->lookahead_hold_char = *(yyextra->lookahead_end);
+   *(yyextra->lookahead_end) = '\0';
+
+   yyextra->have_lookahead = true;
+
+   /* Replace cur_token if needed, based on lookahead */
+   switch (cur_token)
+   {
+       case NULLS_P:
+           /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
             switch (next_token)
             {
                 case FIRST_P:
-                   cur_token = NULLS_FIRST;
-                   break;
                 case LAST_P:
-                   cur_token = NULLS_LAST;
-                   break;
-               default:
-                   /* save the lookahead token for next time */
-                   yyextra->lookahead_token = next_token;
-                   yyextra->lookahead_yylval = lvalp->core_yystype;
-                   yyextra->lookahead_yylloc = *llocp;
-                   yyextra->have_lookahead = true;
-                   /* and back up the output info to cur_token */
-                   lvalp->core_yystype = cur_yylval;
-                   *llocp = cur_yylloc;
+                   cur_token = NULLS_LA;
                     break;
             }
             break;
  
         case WITH:
-
-           /*
-            * WITH TIME and WITH ORDINALITY must each be reduced to one token
-            */
-           cur_yylval = lvalp->core_yystype;
-           cur_yylloc = *llocp;
-           next_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
+           /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
             switch (next_token)
             {
                 case TIME:
-                   cur_token = WITH_TIME;
-                   break;
                 case ORDINALITY:
-                   cur_token = WITH_ORDINALITY;
-                   break;
-               default:
-                   /* save the lookahead token for next time */
-                   yyextra->lookahead_token = next_token;
-                   yyextra->lookahead_yylval = lvalp->core_yystype;
-                   yyextra->lookahead_yylloc = *llocp;
-                   yyextra->have_lookahead = true;
-                   /* and back up the output info to cur_token */
-                   lvalp->core_yystype = cur_yylval;
-                   *llocp = cur_yylloc;
+                   cur_token = WITH_LA;
                     break;
             }
             break;
-
-       default:
-           break;
     }
  
     return cur_token;
diff --git a/src/include/parser/gramparse.h b/src/include/parser/gramparse.h

index d9df30374b2d9367efe1c2e9ba13f92a10e263f0..100fdfb213ef442c8676a2dc90267e129372f8b3 100644 (file)
--- a/src/include/parser/gramparse.h
+++ b/src/include/parser/gramparse.h
@@ -46,6 +46,8 @@ typedef struct base_yy_extra_type
     int         lookahead_token;    /* one-token lookahead */
     core_YYSTYPE lookahead_yylval;      /* yylval for lookahead token */
     YYLTYPE     lookahead_yylloc;       /* yylloc for lookahead token */
+   char       *lookahead_end;  /* end of current token */
+   char        lookahead_hold_char;    /* to be put back at *lookahead_end */
  
     /*
      * State variables that belong to the grammar.
diff --git a/src/interfaces/ecpg/preproc/parse.pl b/src/interfaces/ecpg/preproc/parse.pl

index f998310ef4644648ae292905d1ef0959ce679714..36dce80386343ce6a8d8d697eb124261e783db95 100644 (file)
--- a/src/interfaces/ecpg/preproc/parse.pl
+++ b/src/interfaces/ecpg/preproc/parse.pl
@@ -42,10 +42,8 @@ my %replace_token = (
  
  # or in the block
  my %replace_string = (
-   'WITH_TIME'       => 'with time',
-   'WITH_ORDINALITY' => 'with ordinality',
-   'NULLS_FIRST'     => 'nulls first',
-   'NULLS_LAST'      => 'nulls last',
+   'NULLS_LA'        => 'nulls',
+   'WITH_LA'         => 'with',
     'TYPECAST'        => '::',
     'DOT_DOT'         => '..',
     'COLON_EQUALS'    => ':=',);
diff --git a/src/interfaces/ecpg/preproc/parser.c b/src/interfaces/ecpg/preproc/parser.c

index f1188269533280f18399a6acae441f52f215e7f3..099a213d11872f01796d1f83621996c2dc2f4ea6 100644 (file)
--- a/src/interfaces/ecpg/preproc/parser.c
+++ b/src/interfaces/ecpg/preproc/parser.c
@@ -3,11 +3,8 @@
   * parser.c
   *     Main entry point/driver for PostgreSQL grammar
   *
- * Note that the grammar is not allowed to perform any table access
- * (since we need to be able to do basic parsing even while inside an
- * aborted transaction).  Therefore, the data structures returned by
- * the grammar are "raw" parsetrees that still need to be analyzed by
- * analyze.c and related files.
+ * This should match src/backend/parser/parser.c, except that we do not
+ * need to bother with re-entrant interfaces.
   *
   *
   * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
@@ -29,18 +26,21 @@ static bool have_lookahead;     /* is lookahead info valid? */
  static int lookahead_token;    /* one-token lookahead */
  static YYSTYPE lookahead_yylval;   /* yylval for lookahead token */
  static YYLTYPE lookahead_yylloc;   /* yylloc for lookahead token */
+static char *lookahead_yytext; /* start current token */
+static char *lookahead_end;        /* end of current token */
+static char lookahead_hold_char;   /* to be put back at *lookahead_end */
  
  
  /*
   * Intermediate filter between parser and base lexer (base_yylex in scan.l).
   *
- * The filter is needed because in some cases the standard SQL grammar
+ * This filter is needed because in some cases the standard SQL grammar
   * requires more than one token lookahead.  We reduce these cases to one-token
- * lookahead by combining tokens here, in order to keep the grammar LALR(1).
+ * lookahead by replacing tokens here, in order to keep the grammar LALR(1).
   *
   * Using a filter is simpler than trying to recognize multiword tokens
   * directly in scan.l, because we'd have to allow for comments between the
- * words.  Furthermore it's not clear how to do it without re-introducing
+ * words.  Furthermore it's not clear how to do that without re-introducing
   * scanner backtrack, which would cost more performance than this filter
   * layer does.
   */
@@ -49,8 +49,10 @@ filtered_base_yylex(void)
  {
     int         cur_token;
     int         next_token;
+   int         cur_token_length;
     YYSTYPE     cur_yylval;
     YYLTYPE     cur_yylloc;
+   char       *cur_yytext;
  
     /* Get next token --- we might already have it */
     if (have_lookahead)
@@ -58,74 +60,86 @@ filtered_base_yylex(void)
         cur_token = lookahead_token;
         base_yylval = lookahead_yylval;
         base_yylloc = lookahead_yylloc;
+       yytext = lookahead_yytext;
+       *lookahead_end = lookahead_hold_char;
         have_lookahead = false;
     }
     else
         cur_token = base_yylex();
  
-   /* Do we need to look ahead for a possible multiword token? */
+   /*
+    * If this token isn't one that requires lookahead, just return it.  If it
+    * does, determine the token length.  (We could get that via strlen(), but
+    * since we have such a small set of possibilities, hardwiring seems
+    * feasible and more efficient.)
+    */
     switch (cur_token)
     {
         case NULLS_P:
+           cur_token_length = 5;
+           break;
+       case WITH:
+           cur_token_length = 4;
+           break;
+       default:
+           return cur_token;
+   }
+
+   /*
+    * Identify end+1 of current token.  base_yylex() has temporarily stored a
+    * '\0' here, and will undo that when we call it again.  We need to redo
+    * it to fully revert the lookahead call for error reporting purposes.
+    */
+   lookahead_end = yytext + cur_token_length;
+   Assert(*lookahead_end == '\0');
+
+   /* Save and restore lexer output variables around the call */
+   cur_yylval = base_yylval;
+   cur_yylloc = base_yylloc;
+   cur_yytext = yytext;
+
+   /* Get next token, saving outputs into lookahead variables */
+   next_token = base_yylex();
+
+   lookahead_token = next_token;
+   lookahead_yylval = base_yylval;
+   lookahead_yylloc = base_yylloc;
+   lookahead_yytext = yytext;
+
+   base_yylval = cur_yylval;
+   base_yylloc = cur_yylloc;
+   yytext = cur_yytext;
+
+   /* Now revert the un-truncation of the current token */
+   lookahead_hold_char = *lookahead_end;
+   *lookahead_end = '\0';
+
+   have_lookahead = true;
  
-           /*
-            * NULLS FIRST and NULLS LAST must be reduced to one token
-            */
-           cur_yylval = base_yylval;
-           cur_yylloc = base_yylloc;
-           next_token = base_yylex();
+   /* Replace cur_token if needed, based on lookahead */
+   switch (cur_token)
+   {
+       case NULLS_P:
+           /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
             switch (next_token)
             {
                 case FIRST_P:
-                   cur_token = NULLS_FIRST;
-                   break;
                 case LAST_P:
-                   cur_token = NULLS_LAST;
-                   break;
-               default:
-                   /* save the lookahead token for next time */
-                   lookahead_token = next_token;
-                   lookahead_yylval = base_yylval;
-                   lookahead_yylloc = base_yylloc;
-                   have_lookahead = true;
-                   /* and back up the output info to cur_token */
-                   base_yylval = cur_yylval;
-                   base_yylloc = cur_yylloc;
+                   cur_token = NULLS_LA;
                     break;
             }
             break;
  
         case WITH:
-
-           /*
-            * WITH TIME must be reduced to one token
-            */
-           cur_yylval = base_yylval;
-           cur_yylloc = base_yylloc;
-           next_token = base_yylex();
+           /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
             switch (next_token)
             {
                 case TIME:
-                   cur_token = WITH_TIME;
-                   break;
                 case ORDINALITY:
-                   cur_token = WITH_ORDINALITY;
-                   break;
-               default:
-                   /* save the lookahead token for next time */
-                   lookahead_token = next_token;
-                   lookahead_yylval = base_yylval;
-                   lookahead_yylloc = base_yylloc;
-                   have_lookahead = true;
-                   /* and back up the output info to cur_token */
-                   base_yylval = cur_yylval;
-                   base_yylloc = cur_yylloc;
+                   cur_token = WITH_LA;
                     break;
             }
             break;
-
-       default:
-           break;
     }
  
     return cur_token;
diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out

index 4795c83c9b695330ae7202c75a624540db0391ff..632b7e5bffc880ff4f339f826d9d53d934bf1e85 100644 (file)
--- a/src/test/regress/expected/foreign_data.out
+++ b/src/test/regress/expected/foreign_data.out
@@ -663,7 +663,7 @@ LINE 1: CREATE FOREIGN TABLE ft1 ();
  CREATE FOREIGN TABLE ft1 () SERVER no_server;                   -- ERROR
  ERROR:  server "no_server" does not exist
  CREATE FOREIGN TABLE ft1 () SERVER s0 WITH OIDS;                -- ERROR
-ERROR:  syntax error at or near "WITH OIDS"
+ERROR:  syntax error at or near "WITH"
  LINE 1: CREATE FOREIGN TABLE ft1 () SERVER s0 WITH OIDS;
                                                ^
  CREATE FOREIGN TABLE ft1 (
diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out

index 06b372bf519d0346fafe3c1f4a0ede0d48034aa2..524e0ef2c68d4cc5d2d1406bb1fc5ffd8c900a4f 100644 (file)
--- a/src/test/regress/expected/with.out
+++ b/src/test/regress/expected/with.out
@@ -2155,3 +2155,18 @@ WITH t AS (
  VALUES(FALSE);
  ERROR:  conditional DO INSTEAD rules are not supported for data-modifying statements in WITH
  DROP RULE y_rule ON y;
+-- check that parser lookahead for WITH doesn't cause any odd behavior
+create table foo (with baz);  -- fail, WITH is a reserved word
+ERROR:  syntax error at or near "with"
+LINE 1: create table foo (with baz);
+                          ^
+create table foo (with ordinality);  -- fail, WITH is a reserved word
+ERROR:  syntax error at or near "with"
+LINE 1: create table foo (with ordinality);
+                          ^
+with ordinality as (select 1 as x) select * from ordinality;
+ x 
+---
+ 1
+(1 row)
+
diff --git a/src/test/regress/sql/with.sql b/src/test/regress/sql/with.sql

index c7163699576225662f4fd2cae3401fc40294b1ce..1687c1198305fae371591d57044b73c9104d5382 100644 (file)
--- a/src/test/regress/sql/with.sql
+++ b/src/test/regress/sql/with.sql
@@ -956,3 +956,8 @@ WITH t AS (
  )
  VALUES(FALSE);
  DROP RULE y_rule ON y;
+
+-- check that parser lookahead for WITH doesn't cause any odd behavior
+create table foo (with baz);  -- fail, WITH is a reserved word
+create table foo (with ordinality);  -- fail, WITH is a reserved word
+with ordinality as (select 1 as x) select * from ordinality;
author	Tom Lane
	Tue, 24 Feb 2015 22:53:42 +0000 (17:53 -0500)
committer	Tom Lane
	Tue, 24 Feb 2015 22:53:45 +0000 (17:53 -0500)
src/backend/parser/gram.y		patch \| blob \| blame \| history
src/backend/parser/parser.c		patch \| blob \| blame \| history
src/include/parser/gramparse.h		patch \| blob \| blame \| history
src/interfaces/ecpg/preproc/parse.pl		patch \| blob \| blame \| history
src/interfaces/ecpg/preproc/parser.c		patch \| blob \| blame \| history
src/test/regress/expected/foreign_data.out		patch \| blob \| blame \| history
src/test/regress/expected/with.out		patch \| blob \| blame \| history
src/test/regress/sql/with.sql		patch \| blob \| blame \| history