Fix array_out's failure to backslash backslashes, per bug# 524. Also,
authorTom Lane
Thu, 29 Nov 2001 21:02:41 +0000 (21:02 +0000)
committerTom Lane
Thu, 29 Nov 2001 21:02:41 +0000 (21:02 +0000)
remove brain-dead rule that double quotes are needed if and only if the
datatype is pass-by-reference; neither direction of the implication holds
water.  Instead, examine the actual data string to see if it contains
any characters that force us to quote it.
Add some documentation about quoting of array values, which was previously
explained nowhere AFAICT.

doc/src/sgml/array.sgml
src/backend/utils/adt/arrayfuncs.c
src/test/regress/expected/arrays.out

index 99cfde85f2c4486321689962827ac3b9d707404a..1696d61c257ec7c595bc19949f0352d395a9eef4 100644 (file)
@@ -1,4 +1,4 @@
-
+
 
 
  Arrays
@@ -248,4 +248,36 @@ SELECT * FROM sal_emp WHERE pay_by_quarter **= 10000;
   
  
 
+  Quoting array elements.
+  
+   As shown above, when writing an array literal value you may write double
+   quotes around any individual array
+   element.  You must do so if the element value would otherwise
+   confuse the array-value parser.  For example, elements containing curly
+   braces, commas, double quotes, backslashes, or white space must be
+   double-quoted.  To put a double quote or backslash in an array element
+   value, precede it with a backslash.
+  
+
+  
+   Remember that what you write in an SQL query will first be interpreted
+   as a string literal, and then as an array.  This doubles the number of
+   backslashes you need.  For example, to insert a text array
+   value containing a backslash and a double quote, you'd need to write
+
+INSERT ... VALUES ('{"\\\\","\\""}');
+
+   The string-literal processor removes one level of backslashes, so that
+   what arrives at the array-value parser looks like {"\\","\""}.
+   In turn, the strings fed to the text datatype's input routine
+   become \ and " respectively.  (If we were working
+   with a datatype whose input routine also treated backslashes specially,
+   bytea for example, we might need as many as eight backslashes
+   in the query to get one backslash into the stored array element.)
+  
+
 
index f66878471448e2dbefe98b62450bfb8a839e3bde..0cfa371c998338a8fabbe2d78036c36d8f8ffbb6 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.71 2001/10/25 05:49:43 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.72 2001/11/29 21:02:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -346,6 +346,7 @@ ArrayCount(char *str, int *dim, int typdelim)
  *  If element type is pass-by-ref, the Datums point to palloc'd values.
  *  *nbytes is set to the amount of data space needed for the array,
  *  including alignment padding but not including array header overhead.
+ *  CAUTION: the contents of "arrayStr" may be modified!
  *---------------------------------------------------------------------------
  */
 static Datum *
@@ -564,16 +565,13 @@ array_out(PG_FUNCTION_ARGS)
    char       *p,
               *tmp,
               *retval,
-             **values,
-               delim[2];
+             **values;
+   bool       *needquotes;
    int         nitems,
                overall_length,
                i,
                j,
                k,
-#ifndef TCL_ARRAYS
-               l,
-#endif
                indx[MAXDIM];
    int         ndim,
               *dim;
@@ -581,26 +579,29 @@ array_out(PG_FUNCTION_ARGS)
    system_cache_lookup(element_type, false, &typlen, &typbyval,
                        &typdelim, &typelem, &typoutput, &typalign);
    fmgr_info(typoutput, &outputproc);
-   sprintf(delim, "%c", typdelim);
    ndim = ARR_NDIM(v);
    dim = ARR_DIMS(v);
    nitems = ArrayGetNItems(ndim, dim);
 
    if (nitems == 0)
    {
-       retval = (char *) palloc(3);
-       retval[0] = '{';
-       retval[1] = '}';
-       retval[2] = '\0';
+       retval = pstrdup("{}");
        PG_RETURN_CSTRING(retval);
    }
 
+   /*
+    * Convert all values to string form, count total space needed
+    * (including any overhead such as escaping backslashes),
+    * and detect whether each item needs double quotes.
+    */
+   values = (char **) palloc(nitems * sizeof(char *));
+   needquotes = (bool *) palloc(nitems * sizeof(bool));
    p = ARR_DATA_PTR(v);
    overall_length = 1;         /* [TRH] don't forget to count \0 at end. */
-   values = (char **) palloc(nitems * sizeof(char *));
    for (i = 0; i < nitems; i++)
    {
        Datum       itemvalue;
+       bool        nq;
 
        itemvalue = fetch_att(p, typbyval, typlen);
        values[i] = DatumGetCString(FunctionCall3(&outputproc,
@@ -612,20 +613,32 @@ array_out(PG_FUNCTION_ARGS)
        else
            p += INTALIGN(*(int32 *) p);
 
-       /*
-        * For the pair of double quotes
-        */
-       if (!typbyval)
-           overall_length += 2;
-
+       /* count data plus backslashes; detect chars needing quotes */
+       nq = (values[i][0] == '\0');    /* force quotes for empty string */
        for (tmp = values[i]; *tmp; tmp++)
        {
+           char    ch = *tmp;
+
            overall_length += 1;
+           if (ch == '"' || ch == '\\')
+           {
+               nq = true;
 #ifndef TCL_ARRAYS
-           if (*tmp == '"')
                overall_length += 1;
 #endif
+           }
+           else if (ch == '{' || ch == '}' || ch == typdelim ||
+                    isspace((unsigned char) ch))
+               nq = true;
        }
+
+       needquotes[i] = nq;
+
+       /* Count the pair of double quotes, if needed */
+       if (nq)
+           overall_length += 2;
+
+       /* and the comma */
        overall_length += 1;
    }
 
@@ -634,41 +647,41 @@ array_out(PG_FUNCTION_ARGS)
     */
    for (i = j = 0, k = 1; i < ndim; k *= dim[i++], j += k);
 
-   p = (char *) palloc(overall_length + 2 * j);
-   retval = p;
+   retval = (char *) palloc(overall_length + 2 * j);
+   p = retval;
+
+#define APPENDSTR(str) (strcpy(p, (str)), p += strlen(p))
+#define APPENDCHAR(ch) (*p++ = (ch), *p = '\0')
 
-   strcpy(p, "{");
+   APPENDCHAR('{');
    for (i = 0; i < ndim; indx[i++] = 0);
    j = 0;
    k = 0;
    do
    {
        for (i = j; i < ndim - 1; i++)
-           strcat(p, "{");
+           APPENDCHAR('{');
 
-       /*
-        * Surround anything that is not passed by value in double quotes.
-        * See above for more details.
-        */
-       if (!typbyval)
+       if (needquotes[k])
        {
-           strcat(p, "\"");
+           APPENDCHAR('"');
 #ifndef TCL_ARRAYS
-           l = strlen(p);
            for (tmp = values[k]; *tmp; tmp++)
            {
-               if (*tmp == '"')
-                   p[l++] = '\\';
-               p[l++] = *tmp;
+               char    ch = *tmp;
+
+               if (ch == '"' || ch == '\\')
+                   *p++ = '\\';
+               *p++ = ch;
            }
-           p[l] = '\0';
+           *p = '\0';
 #else
-           strcat(p, values[k]);
+           APPENDSTR(values[k]);
 #endif
-           strcat(p, "\"");
+           APPENDCHAR('"');
        }
        else
-           strcat(p, values[k]);
+           APPENDSTR(values[k]);
        pfree(values[k++]);
 
        for (i = ndim - 1; i >= 0; i--)
@@ -676,16 +689,21 @@ array_out(PG_FUNCTION_ARGS)
            indx[i] = (indx[i] + 1) % dim[i];
            if (indx[i])
            {
-               strcat(p, delim);
+               APPENDCHAR(typdelim);
                break;
            }
            else
-               strcat(p, "}");
+               APPENDCHAR('}');
        }
        j = i;
    } while (j != -1);
 
+#undef APPENDSTR
+#undef APPENDCHAR
+
    pfree(values);
+   pfree(needquotes);
+
    PG_RETURN_CSTRING(retval);
 }
 
index 74c1008d907582c812530250cb9b8a46104cc467..f729fe1ed432c1d69da27e3950222b1b0b2150e3 100644 (file)
@@ -28,11 +28,11 @@ INSERT INTO arrtest (a, b[2][2][1], c, d, e, f, g)
 INSERT INTO arrtest (a, b[1][2][2], c, d[2][1])
    VALUES ('{}', '{3,4}', '{foo,bar}', '{bar,foo}');
 SELECT * FROM arrtest;
-      a      |        b        |       c       |         d         |       e       |         f         |        g        
--------------+-----------------+---------------+-------------------+---------------+-------------------+-----------------
- {1,2,3,4,5} | {{{0,0},{1,2}}} | {}            | {}                |               | {}                | {}
- {11,12,23}  | {{3,4},{4,5}}   | {"foobar"}    | {{"elt1","elt2"}} | {"3.4","6.7"} | {"abc  ","abcde"} | {"abc","abcde"}
- {}          | {3,4}           | {"foo","bar"} | {"bar","foo"}     |               |                   | 
+      a      |        b        |     c     |       d       |     e     |        f        |      g      
+-------------+-----------------+-----------+---------------+-----------+-----------------+-------------
+ {1,2,3,4,5} | {{{0,0},{1,2}}} | {}        | {}            |           | {}              | {}
+ {11,12,23}  | {{3,4},{4,5}}   | {foobar}  | {{elt1,elt2}} | {3.4,6.7} | {"abc  ",abcde} | {abc,abcde}
+ {}          | {3,4}           | {foo,bar} | {bar,foo}     |           |                 | 
 (3 rows)
 
 SELECT arrtest.a[1],
@@ -62,11 +62,11 @@ SELECT a[1:3],
           c[1:2], 
           d[1:1][1:2]
    FROM arrtest;
-     a      |        b        |       c       |         d         
-------------+-----------------+---------------+-------------------
- {1,2,3}    | {{{0,0},{1,2}}} |               
- {11,12,23} |                 | {"foobar"}    | {{"elt1","elt2"}}
-            |                 | {"foo","bar"} | 
+     a      |        b        |     c     |       d       
+------------+-----------------+-----------+---------------
+ {1,2,3}    | {{{0,0},{1,2}}} |           | 
+ {11,12,23} |                 | {foobar}  | {{elt1,elt2}}
+            |                 | {foo,bar} | 
 (3 rows)
 
 SELECT array_dims(a) AS a,array_dims(b) AS b,array_dims(c) AS c
@@ -98,11 +98,11 @@ UPDATE arrtest
   SET c[2:2] = '{"new_word"}'
   WHERE array_dims(c) is not null;
 SELECT a,b,c FROM arrtest;
-       a       |           b           |           c           
----------------+-----------------------+-----------------------
+       a       |           b           |         c         
+---------------+-----------------------+-------------------
  {16,25,3,4,5} | {{{113,142},{1,147}}} | {}
- {}            | {3,4}                 | {"foo","new_word"}
- {16,25,23}    | {{3,4},{4,5}}         | {"foobar","new_word"}
+ {}            | {3,4}                 | {foo,new_word}
+ {16,25,23}    | {{3,4},{4,5}}         | {foobar,new_word}
 (3 rows)
 
 SELECT a[1:3],
@@ -110,10 +110,10 @@ SELECT a[1:3],
           c[1:2], 
           d[1:1][2:2]
    FROM arrtest;
-     a      |           b           |           c           |     d      
-------------+-----------------------+-----------------------+------------
- {16,25,3}  | {{{113,142},{1,147}}} |                       
-            |                       | {"foo","new_word"}    | 
- {16,25,23} |                       | {"foobar","new_word"} | {{"elt2"}}
+     a      |           b           |         c         |    d     
+------------+-----------------------+-------------------+----------
+ {16,25,3}  | {{{113,142},{1,147}}} |                   | 
+            |                       | {foo,new_word}    | 
+ {16,25,23} |                       | {foobar,new_word} | {{elt2}}
 (3 rows)