Also synced the ecpg lexer with the backend lexer.
authorMichael Meskes
Wed, 5 Oct 2005 14:58:36 +0000 (14:58 +0000)
committerMichael Meskes
Wed, 5 Oct 2005 14:58:36 +0000 (14:58 +0000)
src/interfaces/ecpg/ChangeLog
src/interfaces/ecpg/preproc/pgc.l

index c0b87a89070a3b23a9522555f9a3dc0fbdf66a76..4ad3725af00a2acd37e9d7f65b71126076eecc89 100644 (file)
@@ -1945,6 +1945,10 @@ Tue Oct  4 15:23:00 CEST 2005
 
    - Synced parser.
    - Fixed another bug in check to report missing varchar pointer implementation.
+
+Wed Oct  5 16:57:42 CEST 2005
+
+   - Synced lexer.
    - Set ecpg library version to 5.1.
    - Set ecpg version to 4.1.1.
 
index c86f2cdf1e1fcfeaefac37da51f2d7cabab2e6f8..f72b7bf7d2a1e9c91f3af5b7e58466f7c80ee164 100644 (file)
@@ -12,7 +12,7 @@
  *
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.136 2005/06/16 01:43:48 momjian Exp $
+ *   $PostgreSQL: pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.137 2005/10/05 14:58:36 meskes Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -29,6 +29,8 @@ extern YYSTYPE yylval;
 
 static int     xcdepth = 0;    /* depth of nesting in slash-star comments */
 static char    *dolqstart;      /* current $foo$ quote start string */
+bool                    escape_string_warning;
+static bool             warn_on_first_escape;
 
 /*
  * literalbuf is used to accumulate literal values when multiple rules
@@ -44,6 +46,7 @@ static int        literalalloc;           /* current allocated buffer size */
 static void addlit(char *ytext, int yleng);
 static void addlitchar (unsigned char);
 static void parse_include (void);
+static void check_escape_warning(void);
 
 char *token_start;
 int state_before;
@@ -111,48 +114,44 @@ static struct _if_value
 /* Bit string
  */
 xbstart            [bB]{quote}
-xbstop         {quote}
 xbinside       [^']*
-xbcat          {quote}{whitespace_with_newline}{quote}
 
-/* Hexadecimal number
- */
+/* Hexadecimal number */
 xhstart            [xX]{quote}
-xhstop         {quote}
 xhinside       [^']*
-xhcat          {quote}{whitespace_with_newline}{quote}
 
-/* National character
- */
+/* National character */
 xnstart                        [nN]{quote}
 
-/* C version of hex number
- */
+/* Quoted string that allows backslash escapes */
+xestart                 [eE]{quote}
+
+/* C version of hex number */
 xch            0[xX][0-9A-Fa-f]*
 
 /* Extended quote
- * xqdouble implements embedded quote
- * xqcat allows strings to cross input lines
+ * xqdouble implements embedded quote, ''''
  */
-quote          '
 xqstart            {quote}
-xqstop         {quote}
 xqdouble       {quote}{quote}
 xqinside       [^\\']+
 xqescape       [\\][^0-7]
 xqoctesc       [\\][0-7]{1,3}
 xqhexesc       [\\]x[0-9A-Fa-f]{1,2}
-xqcat          {quote}{whitespace_with_newline}{quote}
 
 /* $foo$ style quotes ("dollar quoting")
  * The quoted string starts with $foo$ where "foo" is an optional string
  * in the form of an identifier, except that it may not contain "$",
  * and extends to the first occurrence of an identical string.
  * There is *no* processing of the quoted text.
+ *
+ * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
+ * fails to match its trailing "$".
  */
 dolq_start             [A-Za-z\200-\377_]
 dolq_cont              [A-Za-z\200-\377_0-9]
 dolqdelim              \$({dolq_start}{dolq_cont}*)?\$
+dolqfailed             \${dolq_start}{dolq_cont}*
 dolqinside             [^$]+
 
 /* Double quote
@@ -218,11 +217,16 @@ operator      {op_chars}+
 /* we no longer allow unary minus in numbers.
  * instead we pass it separately to parser. there it gets
  * coerced via doNegate() -- Leon aug 20 1999
+ *
+ * {realfail1} and {realfail2} are added to prevent the need for scanner
+ * backup when the {real} rule fails to match completely.
  */
 
 integer            {digit}+
 decimal            (({digit}*\.{digit}+)|({digit}+\.{digit}*))
-real           ((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))
+real                   ({integer}|{decimal})[Ee][-+]?{digit}+
+realfail1              ({integer}|{decimal})[Ee]
+realfail2              ({integer}|{decimal})[Ee][-+]
 
 param          \${integer}
 
@@ -262,6 +266,11 @@ whitespace     ({space}+|{comment})
 horiz_whitespace   ({horiz_space}|{comment})
 whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*)
 
+quote          '
+quotestop      {quote}{whitespace}*
+quotecontinue      {quote}{whitespace_with_newline}{quote}
+quotefail      {quote}{whitespace}*"-"
+
 /* special characters for other dbms */
 /* we have to react differently in compat mode */
 informix_special   [\$]
@@ -343,6 +352,7 @@ cppline         {space}*#(.*\\{space})*.*{newline}
 
 {xcinside}     { ECHO; }
 {op_chars}     { ECHO; }
+\*+        { ECHO; }
 
 <>        { mmerror(PARSE_ERROR, ET_FATAL, "Unterminated /* comment"); }
 
@@ -352,7 +362,9 @@ cppline         {space}*#(.*\\{space})*.*{newline}
                        startlit();
                        addlitchar('b');
                    }
-{xbstop}   {
+{quotestop} |
+{quotefail}    {
+                       yyless(1);
                        BEGIN(SQL);
                        if (literalbuf[strspn(literalbuf, "01") + 1] != '\0')
                            mmerror(PARSE_ERROR, ET_ERROR, "invalid bit string input.");
@@ -362,8 +374,8 @@ cppline         {space}*#(.*\\{space})*.*{newline}
 
 {xhinside} |
 {xbinside} { addlit(yytext, yyleng); }
-{xhcat}        |
-{xbcat}        { /* ignore */ }
+{quotecontinue}    |
+{quotecontinue}    { /* ignore */ }
 <>        { mmerror(PARSE_ERROR, ET_FATAL, "Unterminated bit string"); }
 
 {xhstart}     {
@@ -371,44 +383,71 @@ cppline           {space}*#(.*\\{space})*.*{newline}
                        BEGIN(xh);
                        startlit();
                        addlitchar('x');
-                   }
-{xhstop}       {
-                       yylval.str = mm_strdup(literalbuf);
-                       return XCONST;
-                   }
+           }
+{quotestop}    |
+{quotefail}    {
+               yyless(1);
+               BEGIN(SQL);
+               yylval.str = mm_strdup(literalbuf);
+               return XCONST;
+           }
 
 <>        { mmerror(PARSE_ERROR, ET_FATAL, "Unterminated hexadecimal integer"); }
 {xnstart}              {
                /* National character.
-                * Need to remember type info to flow it forward into the parser.
-                        * Not yet implemented. - thomas 2002-06-17
+                        * Transfer it as-is to the backend.
                         */
                    token_start = yytext;
                BEGIN(xq);
                startlit();
            }
 {xqstart}   {
-                       token_start = yytext;
-                       state_before = YYSTATE;
-                       BEGIN(xq);
-                       startlit();
-                   }
-{xqstop}       {
-                       BEGIN(state_before);
-                       yylval.str = mm_strdup(literalbuf);
-                       return SCONST;
-                   }
+               warn_on_first_escape = true;
+               token_start = yytext;
+               state_before = YYSTATE;
+               BEGIN(xq);
+               startlit();
+           }
+{xestart}   {
+               warn_on_first_escape = false;
+               token_start = yytext;
+               state_before = YYSTATE;
+               BEGIN(xq);
+               startlit();
+           }
+{quotestop} |
+{quotefail}        {
+               yyless(1);
+               BEGIN(state_before);
+               yylval.str = mm_strdup(literalbuf);
+               return SCONST;
+           }
 {xqdouble}     { addlitchar('\''); }
 {xqinside}     { addlit(yytext, yyleng); }
-{xqescape}     { addlit(yytext, yyleng); }
-{xqoctesc}     { addlit(yytext, yyleng); }
-{xqhexesc}     { addlit(yytext, yyleng); }
-{xqcat}        { /* ignore */ }
+{xqescape}     { 
+               check_escape_warning();
+               addlit(yytext, yyleng);
+           }
+{xqoctesc}     { 
+               check_escape_warning();
+               addlit(yytext, yyleng);
+           }
+{xqhexesc}     { 
+               check_escape_warning();
+               addlit(yytext, yyleng);
+           }
+{quotecontinue}    { /* ignore */ }
 .                   {
                                        /* This is only needed for \ just before EOF */
                                        addlitchar(yytext[0]);
                         }
 <>        { mmerror(PARSE_ERROR, ET_FATAL, "Unterminated quoted string"); }
+{dolqfailed}  {
+               /* throw back all but the initial "$" */
+               yyless(1);
+               /* and treat it as {other} */
+               return yytext[0];   
+           }
 {dolqdelim}        {
                token_start = yytext;
                dolqstart = mm_strdup(yytext);
@@ -434,9 +473,8 @@ cppline         {space}*#(.*\\{space})*.*{newline}
                        yyless(yyleng-1);
                }
            }
-{dolqinside}    {
-               addlit(yytext, yyleng);
-           }
+{dolqinside}    { addlit(yytext, yyleng); }
+{dolqfailed}    { addlit(yytext, yyleng); }
 .       {
                /* This is only needed for $ inside the quoted text */
                addlitchar(yytext[0]);
@@ -588,11 +626,21 @@ cppline           {space}*#(.*\\{space})*.*{newline}
 {decimal}          {
                        yylval.str = mm_strdup(yytext);
                        return FCONST;
-                   }
+           }
 {real}      {
                        yylval.str = mm_strdup(yytext);
                        return FCONST;
-                   }
+           }
+{realfail1}   {
+               yyless(yyleng-1);
+               yylval.str = mm_strdup(yytext);
+               return FCONST;
+           }
+{realfail2}   {
+               yyless(yyleng-2);
+               yylval.str = mm_strdup(yytext);
+               return FCONST;
+           }
 :{identifier}((("->"|\.){identifier})|(\[{array}\]))* {
                        yylval.str = mm_strdup(yytext+1);
                        return(CVARIABLE);
@@ -1189,3 +1237,11 @@ parse_include(void)
 
    BEGIN C;
 }
+
+static void
+check_escape_warning(void)
+{
+   if (warn_on_first_escape && escape_string_warning)
+           mmerror (PARSE_ERROR, ET_WARNING, "nonstandard use of escape in a string literal");
+        warn_on_first_escape = false;   /* warn only once per string */
+}