Fix jsonb Unicode escape processing, and in consequence disallow \u0000.

author Tom Lane

Fri, 30 Jan 2015 19:44:46 +0000 (14:44 -0500)

committer Tom Lane

Fri, 30 Jan 2015 19:44:56 +0000 (14:44 -0500)
author Tom Lane
Fri, 30 Jan 2015 19:44:46 +0000 (14:44 -0500)
committer Tom Lane
Fri, 30 Jan 2015 19:44:56 +0000 (14:44 -0500)
diff --git a/doc/src/sgml/json.sgml b/doc/src/sgml/json.sgml

index 8feb2fbf0ad251833380c8da7fd62c876ef330e8..6282ab885397683428197d526bbb226093184c9e 100644 (file)
--- a/doc/src/sgml/json.sgml
+++ b/doc/src/sgml/json.sgml
@@ -69,12 +69,14 @@
    regardless of the database encoding, and are checked only for syntactic
    correctness (that is, that four hex digits follow \u).
    However, the input function for jsonb is stricter: it disallows
-  Unicode escapes for non-ASCII characters (those
-  above U+007F) unless the database encoding is UTF8.  It also
-  insists that any use of Unicode surrogate pairs to designate characters
-  outside the Unicode Basic Multilingual Plane be correct.  Valid Unicode
-  escapes, except for \u0000, are then converted to the
-  equivalent ASCII or UTF8 character for storage.
+  Unicode escapes for non-ASCII characters (those above U+007F)
+  unless the database encoding is UTF8.  The jsonb type also
+  rejects \u0000 (because that cannot be represented in
+  PostgreSQL's text type), and it insists
+  that any use of Unicode surrogate pairs to designate characters outside
+  the Unicode Basic Multilingual Plane be correct.  Valid Unicode escapes
+  are converted to the equivalent ASCII or UTF8 character for storage;
+  this includes folding surrogate pairs into a single character.
   
  
   
@@ -101,7 +103,7 @@
    constitutes valid jsonb data that do not apply to
    the json type, nor to JSON in the abstract, corresponding
    to limits on what can be represented by the underlying data type.
-  Specifically, jsonb will reject numbers that are outside the
+  Notably, jsonb will reject numbers that are outside the
    range of the PostgreSQL numeric data
    type, while json will not.  Such implementation-defined
    restrictions are permitted by RFC 7159.  However, in
@@ -134,7 +136,8 @@
         
          string
          text
-        See notes above concerning encoding restrictions
+        \u0000 is disallowed, as are non-ASCII Unicode
+         escapes if database encoding is not UTF8
         
         
          number
diff --git a/doc/src/sgml/release-9.4.sgml b/doc/src/sgml/release-9.4.sgml

index 961e4617978e965ab065ee368c28e0efc23f15f4..11bbf3bf36ce6b28c777bfda293a8496864400e4 100644 (file)
--- a/doc/src/sgml/release-9.4.sgml
+++ b/doc/src/sgml/release-9.4.sgml
@@ -101,22 +101,6 @@
       
      
  
-    
-     
-      Unicode escapes in JSON
-      text values are no longer rendered with the backslash escaped
-      (Andrew Dunstan)
-     
-
-     
-      Previously, all backslashes in text values being formed into JSON
-      were escaped. Now a backslash followed by u and four
-      hexadecimal digits is not escaped, as this is a legal sequence in a
-      JSON string value, and escaping the backslash led to some perverse
-      results.
-     
-    
-
      
       
        When converting values of type date, timestamp
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c

index 3c137ead1d07cc526ef428c1aeacefbbc6913d11..951b6554007b2272d917e1e431c084130a7ed6d7 100644 (file)
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -806,14 +806,17 @@ json_lex_string(JsonLexContext *lex)
                      * For UTF8, replace the escape sequence by the actual
                      * utf8 character in lex->strval. Do this also for other
                      * encodings if the escape designates an ASCII character,
-                    * otherwise raise an error. We don't ever unescape a
-                    * \u0000, since that would result in an impermissible nul
-                    * byte.
+                    * otherwise raise an error.
                      */
  
                     if (ch == 0)
                     {
-                       appendStringInfoString(lex->strval, "\\u0000");
+                       /* We can't allow this, since our TEXT type doesn't */
+                       ereport(ERROR,
+                               (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
+                              errmsg("unsupported Unicode escape sequence"),
+                          errdetail("\\u0000 cannot be converted to text."),
+                                report_json_context(lex)));
                     }
                     else if (GetDatabaseEncoding() == PG_UTF8)
                     {
@@ -833,8 +836,8 @@ json_lex_string(JsonLexContext *lex)
                     else
                     {
                         ereport(ERROR,
-                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                errmsg("invalid input syntax for type json"),
+                               (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
+                              errmsg("unsupported Unicode escape sequence"),
                                  errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."),
                                  report_json_context(lex)));
                     }
@@ -1284,8 +1287,8 @@ json_categorize_type(Oid typoid,
  
     /*
      * We need to get the output function for everything except date and
-    * timestamp types, array and composite types, booleans,
-    * and non-builtin types  where there's a cast to json.
+    * timestamp types, array and composite types, booleans, and non-builtin
+    * types where there's a cast to json.
      */
  
     switch (typoid)
@@ -1335,11 +1338,12 @@ json_categorize_type(Oid typoid,
                 /* but let's look for a cast to json, if it's not built-in */
                 if (typoid >= FirstNormalObjectId)
                 {
-                   Oid castfunc;
+                   Oid         castfunc;
                     CoercionPathType ctype;
  
                     ctype = find_coercion_pathway(JSONOID, typoid,
-                                                 COERCION_EXPLICIT, &castfunc);
+                                                 COERCION_EXPLICIT,
+                                                 &castfunc);
                     if (ctype == COERCION_PATH_FUNC && OidIsValid(castfunc))
                     {
                         *tcategory = JSONTYPE_CAST;
@@ -2382,30 +2386,7 @@ escape_json(StringInfo buf, const char *str)
                 appendStringInfoString(buf, "\\\"");
                 break;
             case '\\':
-
-               /*
-                * Unicode escapes are passed through as is. There is no
-                * requirement that they denote a valid character in the
-                * server encoding - indeed that is a big part of their
-                * usefulness.
-                *
-                * All we require is that they consist of \uXXXX where the Xs
-                * are hexadecimal digits. It is the responsibility of the
-                * caller of, say, to_json() to make sure that the unicode
-                * escape is valid.
-                *
-                * In the case of a jsonb string value being escaped, the only
-                * unicode escape that should be present is \u0000, all the
-                * other unicode escapes will have been resolved.
-                */
-               if (p[1] == 'u' &&
-                   isxdigit((unsigned char) p[2]) &&
-                   isxdigit((unsigned char) p[3]) &&
-                   isxdigit((unsigned char) p[4]) &&
-                   isxdigit((unsigned char) p[5]))
-                   appendStringInfoCharMacro(buf, *p);
-               else
-                   appendStringInfoString(buf, "\\\\");
+               appendStringInfoString(buf, "\\\\");
                 break;
             default:
                 if ((unsigned char) *p < ' ')
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out

index e435d3e16502b23e236007058a983aab2f200272..16704363dc62b9ccfedab1b124f574821f5c936b 100644 (file)
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -426,20 +426,6 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
  (1 row)
  
  COMMIT;
--- unicode escape - backslash is not escaped
-select to_json(text '\uabcd');
- to_json  
-----------
- "\uabcd"
-(1 row)
-
--- any other backslash is escaped
-select to_json(text '\abcd');
- to_json  
-----------
- "\\abcd"
-(1 row)
-
  --json_agg
  SELECT json_agg(q)
    FROM ( SELECT $$a$$ || x AS b, y AS c,
@@ -1400,6 +1386,36 @@ ERROR:  invalid input syntax for type json
  DETAIL:  Unicode low surrogate must follow a high surrogate.
  CONTEXT:  JSON data, line 1: { "a":...
  --handling of simple unicode escapes
+select json '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+            correct_in_utf8            
+---------------------------------------
+ { "a":  "the Copyright \u00a9 sign" }
+(1 row)
+
+select json '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+         correct_everywhere          
+-------------------------------------
+ { "a":  "dollar \u0024 character" }
+(1 row)
+
+select json '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+            not_an_escape             
+--------------------------------------
+ { "a":  "dollar \\u0024 character" }
+(1 row)
+
+select json '{ "a":  "null \u0000 escape" }' as not_unescaped;
+         not_unescaped          
+--------------------------------
+ { "a":  "null \u0000 escape" }
+(1 row)
+
+select json '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+          not_an_escape          
+---------------------------------
+ { "a":  "null \\u0000 escape" }
+(1 row)
+
  select json '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
     correct_in_utf8    
  ----------------------
@@ -1412,8 +1428,18 @@ select json '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
   dollar $ character
  (1 row)
  
-select json '{ "a":  "null \u0000 escape" }' ->> 'a' as not_unescaped;
-   not_unescaped    
+select json '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+      not_an_escape      
+-------------------------
+ dollar \u0024 character
+(1 row)
+
+select json '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+ERROR:  unsupported Unicode escape sequence
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+select json '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
+   not_an_escape    
  --------------------
   null \u0000 escape
  (1 row)
diff --git a/src/test/regress/expected/json_1.out b/src/test/regress/expected/json_1.out

index 106b481fab91bf82a6b9c339875e2c2884d42272..807814641dd897f6e47c3b5eb8141c21f3269714 100644 (file)
--- a/src/test/regress/expected/json_1.out
+++ b/src/test/regress/expected/json_1.out
@@ -426,20 +426,6 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
  (1 row)
  
  COMMIT;
--- unicode escape - backslash is not escaped
-select to_json(text '\uabcd');
- to_json  
-----------
- "\uabcd"
-(1 row)
-
--- any other backslash is escaped
-select to_json(text '\abcd');
- to_json  
-----------
- "\\abcd"
-(1 row)
-
  --json_agg
  SELECT json_agg(q)
    FROM ( SELECT $$a$$ || x AS b, y AS c,
@@ -1378,7 +1364,7 @@ select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":[100,200,3
  
  -- handling of unicode surrogate pairs
  select json '{ "a":  "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
-ERROR:  invalid input syntax for type json
+ERROR:  unsupported Unicode escape sequence
  DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
  CONTEXT:  JSON data, line 1: { "a":...
  select json '{ "a":  "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
@@ -1398,8 +1384,38 @@ ERROR:  invalid input syntax for type json
  DETAIL:  Unicode low surrogate must follow a high surrogate.
  CONTEXT:  JSON data, line 1: { "a":...
  --handling of simple unicode escapes
+select json '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+            correct_in_utf8            
+---------------------------------------
+ { "a":  "the Copyright \u00a9 sign" }
+(1 row)
+
+select json '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+         correct_everywhere          
+-------------------------------------
+ { "a":  "dollar \u0024 character" }
+(1 row)
+
+select json '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+            not_an_escape             
+--------------------------------------
+ { "a":  "dollar \\u0024 character" }
+(1 row)
+
+select json '{ "a":  "null \u0000 escape" }' as not_unescaped;
+         not_unescaped          
+--------------------------------
+ { "a":  "null \u0000 escape" }
+(1 row)
+
+select json '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+          not_an_escape          
+---------------------------------
+ { "a":  "null \\u0000 escape" }
+(1 row)
+
  select json '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
-ERROR:  invalid input syntax for type json
+ERROR:  unsupported Unicode escape sequence
  DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
  CONTEXT:  JSON data, line 1: { "a":...
  select json '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
@@ -1408,8 +1424,18 @@ select json '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
   dollar $ character
  (1 row)
  
-select json '{ "a":  "null \u0000 escape" }' ->> 'a' as not_unescaped;
-   not_unescaped    
+select json '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+      not_an_escape      
+-------------------------
+ dollar \u0024 character
+(1 row)
+
+select json '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+ERROR:  unsupported Unicode escape sequence
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+select json '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
+   not_an_escape    
  --------------------
   null \u0000 escape
  (1 row)
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out

index aa5686ffb69dbeda3a355f13cfcd1ed1fd296265..6c6ed950f0830c8323d48134618e57ed0c0fc9de 100644 (file)
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -60,12 +60,18 @@ LINE 1: SELECT '"\u000g"'::jsonb;
                 ^
  DETAIL:  "\u" must be followed by four hexadecimal digits.
  CONTEXT:  JSON data, line 1: "\u000g...
-SELECT '"\u0000"'::jsonb;      -- OK, legal escape
-  jsonb   
-----------
- "\u0000"
+SELECT '"\u0045"'::jsonb;      -- OK, legal escape
+ jsonb 
+-------
+ "E"
  (1 row)
  
+SELECT '"\u0000"'::jsonb;      -- ERROR, we don't support U+0000
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT '"\u0000"'::jsonb;
+               ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: ...
  -- use octet_length here so we don't get an odd unicode char in the
  -- output
  SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@@ -324,20 +330,6 @@ select to_jsonb(timestamptz '2014-05-28 12:22:35.614298-04');
  (1 row)
  
  COMMIT;
--- unicode escape - backslash is not escaped
-select to_jsonb(text '\uabcd');
- to_jsonb 
-----------
- "\uabcd"
-(1 row)
-
--- any other backslash is escaped
-select to_jsonb(text '\abcd');
- to_jsonb 
-----------
- "\\abcd"
-(1 row)
-
  --jsonb_agg
  CREATE TEMP TABLE rows AS
  SELECT x, 'txt' || x as y
@@ -1971,20 +1963,62 @@ LINE 1: SELECT jsonb '{ "a":  "\ude04X" }' -> 'a';
  DETAIL:  Unicode low surrogate must follow a high surrogate.
  CONTEXT:  JSON data, line 1: { "a":...
  -- handling of simple unicode escapes
-SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' AS correct_in_utf8;
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+        correct_in_utf8        
+-------------------------------
+ {"a": "the Copyright © sign"}
+(1 row)
+
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+     correct_everywhere      
+-----------------------------
+ {"a": "dollar $ character"}
+(1 row)
+
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+           not_an_escape           
+-----------------------------------
+ {"a": "dollar \\u0024 character"}
+(1 row)
+
+SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
+                     ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+        not_an_escape         
+------------------------------
+ {"a": "null \\u0000 escape"}
+(1 row)
+
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
     correct_in_utf8    
  ----------------------
   the Copyright © sign
  (1 row)
  
-SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' AS correct_everyWHERE;
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
   correct_everywhere 
  --------------------
   dollar $ character
  (1 row)
  
-SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' AS not_unescaped;
-   not_unescaped    
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+      not_an_escape      
+-------------------------
+ dollar \u0024 character
+(1 row)
+
+SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fai...
+                     ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
+   not_an_escape    
  --------------------
   null \u0000 escape
  (1 row)
diff --git a/src/test/regress/expected/jsonb_1.out b/src/test/regress/expected/jsonb_1.out

index 687ae63b7072ad2fe69b5fd912e868a12577f96f..f30148d51c1bdc232266ca5a6998237b67f39de0 100644 (file)
--- a/src/test/regress/expected/jsonb_1.out
+++ b/src/test/regress/expected/jsonb_1.out
@@ -60,16 +60,22 @@ LINE 1: SELECT '"\u000g"'::jsonb;
                 ^
  DETAIL:  "\u" must be followed by four hexadecimal digits.
  CONTEXT:  JSON data, line 1: "\u000g...
-SELECT '"\u0000"'::jsonb;      -- OK, legal escape
-  jsonb   
-----------
- "\u0000"
+SELECT '"\u0045"'::jsonb;      -- OK, legal escape
+ jsonb 
+-------
+ "E"
  (1 row)
  
+SELECT '"\u0000"'::jsonb;      -- ERROR, we don't support U+0000
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT '"\u0000"'::jsonb;
+               ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: ...
  -- use octet_length here so we don't get an odd unicode char in the
  -- output
  SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
-ERROR:  invalid input syntax for type json
+ERROR:  unsupported Unicode escape sequence
  LINE 1: SELECT octet_length('"\uaBcD"'::jsonb::text);
                              ^
  DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
@@ -324,20 +330,6 @@ select to_jsonb(timestamptz '2014-05-28 12:22:35.614298-04');
  (1 row)
  
  COMMIT;
--- unicode escape - backslash is not escaped
-select to_jsonb(text '\uabcd');
- to_jsonb 
-----------
- "\uabcd"
-(1 row)
-
--- any other backslash is escaped
-select to_jsonb(text '\abcd');
- to_jsonb 
-----------
- "\\abcd"
-(1 row)
-
  --jsonb_agg
  CREATE TEMP TABLE rows AS
  SELECT x, 'txt' || x as y
@@ -1941,7 +1933,7 @@ SELECT * FROM jsonb_populate_recordset(row('def',99,NULL)::jbpop,'[{"a":[100,200
  
  -- handling of unicode surrogate pairs
  SELECT octet_length((jsonb '{ "a":  "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8;
-ERROR:  invalid input syntax for type json
+ERROR:  unsupported Unicode escape sequence
  LINE 1: SELECT octet_length((jsonb '{ "a":  "\ud83d\ude04\ud83d\udc3...
                                     ^
  DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
@@ -1971,20 +1963,62 @@ LINE 1: SELECT jsonb '{ "a":  "\ude04X" }' -> 'a';
  DETAIL:  Unicode low surrogate must follow a high surrogate.
  CONTEXT:  JSON data, line 1: { "a":...
  -- handling of simple unicode escapes
-SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' AS correct_in_utf8;
-ERROR:  invalid input syntax for type json
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' as corr...
+                     ^
+DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
+CONTEXT:  JSON data, line 1: { "a":...
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+     correct_everywhere      
+-----------------------------
+ {"a": "dollar $ character"}
+(1 row)
+
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+           not_an_escape           
+-----------------------------------
+ {"a": "dollar \\u0024 character"}
+(1 row)
+
+SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
+                     ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+        not_an_escape         
+------------------------------
+ {"a": "null \\u0000 escape"}
+(1 row)
+
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
+ERROR:  unsupported Unicode escape sequence
  LINE 1: SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a'...
                       ^
  DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
  CONTEXT:  JSON data, line 1: { "a":...
-SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' AS correct_everyWHERE;
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
   correct_everywhere 
  --------------------
   dollar $ character
  (1 row)
  
-SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' AS not_unescaped;
-   not_unescaped    
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+      not_an_escape      
+-------------------------
+ dollar \u0024 character
+(1 row)
+
+SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fai...
+                     ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
+   not_an_escape    
  --------------------
   null \u0000 escape
  (1 row)
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql

index 36a6674ff91092e0dc525edfdff516102eaa5c9e..53a37a88439171127c220470f5319dcdc172239d 100644 (file)
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -111,14 +111,6 @@ SET LOCAL TIME ZONE -8;
  select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
  COMMIT;
  
--- unicode escape - backslash is not escaped
-
-select to_json(text '\uabcd');
-
--- any other backslash is escaped
-
-select to_json(text '\abcd');
-
  --json_agg
  
  SELECT json_agg(q)
@@ -401,9 +393,17 @@ select json '{ "a":  "\ude04X" }' -> 'a'; -- orphan low surrogate
  
  --handling of simple unicode escapes
  
+select json '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+select json '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+select json '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+select json '{ "a":  "null \u0000 escape" }' as not_unescaped;
+select json '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+
  select json '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
  select json '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
-select json '{ "a":  "null \u0000 escape" }' ->> 'a' as not_unescaped;
+select json '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+select json '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+select json '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
  
  --json_typeof() function
  select value, json_typeof(value)
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql

index a846103933092dd51c4f49a962ba03775a07dcf5..53cc2393c626c01d8303a3b9bb63293a1b50593b 100644 (file)
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -10,7 +10,8 @@ SELECT '"\v"'::jsonb;         -- ERROR, not a valid JSON escape
  SELECT '"\u"'::jsonb;          -- ERROR, incomplete escape
  SELECT '"\u00"'::jsonb;            -- ERROR, incomplete escape
  SELECT '"\u000g"'::jsonb;      -- ERROR, g is not a hex digit
-SELECT '"\u0000"'::jsonb;      -- OK, legal escape
+SELECT '"\u0045"'::jsonb;      -- OK, legal escape
+SELECT '"\u0000"'::jsonb;      -- ERROR, we don't support U+0000
  -- use octet_length here so we don't get an odd unicode char in the
  -- output
  SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@@ -73,14 +74,6 @@ SET LOCAL TIME ZONE -8;
  select to_jsonb(timestamptz '2014-05-28 12:22:35.614298-04');
  COMMIT;
  
--- unicode escape - backslash is not escaped
-
-select to_jsonb(text '\uabcd');
-
--- any other backslash is escaped
-
-select to_jsonb(text '\abcd');
-
  --jsonb_agg
  
  CREATE TEMP TABLE rows AS
@@ -488,9 +481,18 @@ SELECT jsonb '{ "a":  "\ud83dX" }' -> 'a'; -- orphan high surrogate
  SELECT jsonb '{ "a":  "\ude04X" }' -> 'a'; -- orphan low surrogate
  
  -- handling of simple unicode escapes
-SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' AS correct_in_utf8;
-SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' AS correct_everyWHERE;
-SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' AS not_unescaped;
+
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
  
  -- jsonb_to_record and jsonb_to_recordset
author	Tom Lane
	Fri, 30 Jan 2015 19:44:46 +0000 (14:44 -0500)
committer	Tom Lane
	Fri, 30 Jan 2015 19:44:56 +0000 (14:44 -0500)
doc/src/sgml/json.sgml		patch \| blob \| blame \| history
doc/src/sgml/release-9.4.sgml		patch \| blob \| blame \| history
src/backend/utils/adt/json.c		patch \| blob \| blame \| history
src/test/regress/expected/json.out		patch \| blob \| blame \| history
src/test/regress/expected/json_1.out		patch \| blob \| blame \| history
src/test/regress/expected/jsonb.out		patch \| blob \| blame \| history
src/test/regress/expected/jsonb_1.out		patch \| blob \| blame \| history
src/test/regress/sql/json.sql		patch \| blob \| blame \| history
src/test/regress/sql/jsonb.sql		patch \| blob \| blame \| history