Improve efficiency of attribute scanning in CopyReadAttributesCSV.
authorAndrew Dunstan
Sat, 8 Mar 2008 01:16:26 +0000 (01:16 +0000)
committerAndrew Dunstan
Sat, 8 Mar 2008 01:16:26 +0000 (01:16 +0000)
The loop is split into two parts, inside quotes, and outside quotes, saving some instructions in both parts.

Heikki Linnakangas

src/backend/commands/copy.c

index 2fb2a312710e02e58ae9350558a93a419d0a2b30..194f3b380a1e0ef05db8af1a5528f059c03bab0f 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.295 2008/01/01 19:45:48 momjian Exp $
+ *   $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.296 2008/03/08 01:16:26 adunstan Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2913,7 +2913,6 @@ CopyReadAttributesCSV(CopyState cstate, int maxfields, char **fieldvals)
    for (;;)
    {
        bool        found_delim = false;
-       bool        in_quote = false;
        bool        saw_quote = false;
        char       *start_ptr;
        char       *end_ptr;
@@ -2929,72 +2928,87 @@ CopyReadAttributesCSV(CopyState cstate, int maxfields, char **fieldvals)
        start_ptr = cur_ptr;
        fieldvals[fieldno] = output_ptr;
 
-       /* Scan data for field */
+       /* Scan data for field,
+        *
+        * The loop starts in "not quote" mode and then toggles between 
+        * that and "in quote" mode. 
+        * The loop exits normally if it is in "not quote" mode and a
+        * delimiter or line end is seen.
+        */
        for (;;)
        {
            char        c;
 
-           end_ptr = cur_ptr;
-           if (cur_ptr >= line_end_ptr)
-               break;
-           c = *cur_ptr++;
-           /* unquoted field delimiter */
-           if (c == delimc && !in_quote)
-           {
-               found_delim = true;
-               break;
-           }
-           /* start of quoted field (or part of field) */
-           if (c == quotec && !in_quote)
+           /* Not in quote */
+           for (;;)
            {
-               saw_quote = true;
-               in_quote = true;
-               continue;
+               end_ptr = cur_ptr;
+               if (cur_ptr >= line_end_ptr)
+                   goto endfield;
+               c = *cur_ptr++;
+               /* unquoted field delimiter */
+               if (c == delimc)
+               {
+                   found_delim = true;
+                   goto endfield;
+               }
+               /* start of quoted field (or part of field) */
+               if (c == quotec)
+               {
+                   saw_quote = true;
+                   break;
+               }
+               /* Add c to output string */
+               *output_ptr++ = c;
            }
-           /* escape within a quoted field */
-           if (c == escapec && in_quote)
+
+           /* In quote */
+           for (;;)
            {
-               /*
-                * peek at the next char if available, and escape it if it is
-                * an escape char or a quote char
-                */
-               if (cur_ptr < line_end_ptr)
-               {
-                   char        nextc = *cur_ptr;
+               end_ptr = cur_ptr;
+               if (cur_ptr >= line_end_ptr)
+                   ereport(ERROR,
+                           (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+                            errmsg("unterminated CSV quoted field")));
+
+               c = *cur_ptr++;
 
-                   if (nextc == escapec || nextc == quotec)
+               /* escape within a quoted field */
+               if (c == escapec)
+               {
+                   /*
+                    * peek at the next char if available, and escape it if it is
+                    * an escape char or a quote char
+                    */
+                   if (cur_ptr < line_end_ptr)
                    {
-                       *output_ptr++ = nextc;
-                       cur_ptr++;
-                       continue;
+                       char        nextc = *cur_ptr;
+
+                       if (nextc == escapec || nextc == quotec)
+                       {
+                           *output_ptr++ = nextc;
+                           cur_ptr++;
+                           continue;
+                       }
                    }
                }
-           }
+               /*
+                * end of quoted field. Must do this test after testing for escape
+                * in case quote char and escape char are the same (which is the
+                * common case).
+                */
+               if (c == quotec)
+                   break;
 
-           /*
-            * end of quoted field. Must do this test after testing for escape
-            * in case quote char and escape char are the same (which is the
-            * common case).
-            */
-           if (c == quotec && in_quote)
-           {
-               in_quote = false;
-               continue;
+               /* Add c to output string */
+               *output_ptr++ = c;
            }
-
-           /* Add c to output string */
-           *output_ptr++ = c;
        }
+   endfield:
 
        /* Terminate attribute value in output area */
        *output_ptr++ = '\0';
 
-       /* Shouldn't still be in quote mode */
-       if (in_quote)
-           ereport(ERROR,
-                   (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
-                    errmsg("unterminated CSV quoted field")));
-
        /* Check whether raw input matched null marker */
        input_len = end_ptr - start_ptr;
        if (!saw_quote && input_len == cstate->null_print_len &&