Built-in JSON data type.

author Robert Haas

Tue, 31 Jan 2012 16:48:23 +0000 (11:48 -0500)

committer Robert Haas

Tue, 31 Jan 2012 16:48:23 +0000 (11:48 -0500)
author Robert Haas
Tue, 31 Jan 2012 16:48:23 +0000 (11:48 -0500)
committer Robert Haas
Tue, 31 Jan 2012 16:48:23 +0000 (11:48 -0500)
diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml

index 152ef2f0378e4f932a71f5842b3daeceec94dc9d..58e46b5870170b9d7082b33f42211a8b2ad41701 100644 (file)
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -269,6 +269,12 @@
         
         XML data
        
+
+      
+       json
+       
+       JSON data
+      
       
      
     
@@ -4169,6 +4175,32 @@ SET xmloption TO { DOCUMENT | CONTENT };
     
    
  
+  
+   <acronym>JSON</> Type
+
+   
+    JSON
+   
+
+   
+    The json data type can be used to store JSON data.  Such
+    data can also be stored as text, but the
+    json data type has the advantage of checking that each
+    stored value is a valid JSON value.
+   
+
+   
+    PostgreSQL allows only one server encoding
+    per database.  It is therefore not possible for JSON to conform rigidly
+    to the specification unless the server encoding is UTF-8.  Attempts to
+    directly include characters which cannot be represented in the server
+    encoding will fail; conversely, characters which can be represented in
+    the server encoding but not in UTF-8 will be allowed.
+    \uXXXX escapes are allowed regardless of the server
+    encoding, and are checked only for syntactic correctness.
+   
+  
+
    &array;
  
    &rowtypes;
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c

index e297e9cfb9079be18500d8e425ee6cdfa0f731be..e35ac590301b705605c8f368fb12860425475e54 100644 (file)
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -242,7 +242,7 @@ ExplainResultDesc(ExplainStmt *stmt)
  {
     TupleDesc   tupdesc;
     ListCell   *lc;
-   bool        xml = false;
+   Oid         result_type = TEXTOID;
  
     /* Check for XML format option */
     foreach(lc, stmt->options)
@@ -253,7 +253,12 @@ ExplainResultDesc(ExplainStmt *stmt)
         {
             char       *p = defGetString(opt);
  
-           xml = (strcmp(p, "xml") == 0);
+           if (strcmp(p, "xml") == 0)
+               result_type = XMLOID;
+           else if (strcmp(p, "json") == 0)
+               result_type = JSONOID;
+           else
+               result_type = TEXTOID;
             /* don't "break", as ExplainQuery will use the last value */
         }
     }
@@ -261,7 +266,7 @@ ExplainResultDesc(ExplainStmt *stmt)
     /* Need a tuple descriptor representing a single TEXT or XML column */
     tupdesc = CreateTemplateTupleDesc(1, false);
     TupleDescInitEntry(tupdesc, (AttrNumber) 1, "QUERY PLAN",
-                      xml ? XMLOID : TEXTOID, -1, 0);
+                      result_type, -1, 0);
     return tupdesc;
  }
  
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile

index 5f968b011fcd637631eaceca082fd75d90c9360d..c635c38f5b82602bd52057bef3727cf44efe4dc7 100644 (file)
--- a/src/backend/utils/adt/Makefile
+++ b/src/backend/utils/adt/Makefile
@@ -18,7 +18,7 @@ endif
  OBJS = acl.o arrayfuncs.o array_userfuncs.o arrayutils.o bool.o \
     cash.o char.o date.o datetime.o datum.o domains.o \
     enum.o float.o format_type.o \
-   geo_ops.o geo_selfuncs.o int.o int8.o like.o lockfuncs.o \
+   geo_ops.o geo_selfuncs.o int.o int8.o json.o like.o lockfuncs.o \
     misc.o nabstime.o name.o numeric.o numutils.o \
     oid.o oracle_compat.o pseudotypes.o rangetypes.o rangetypes_gist.o \
     rowtypes.o regexp.o regproc.o ruleutils.o selfuncs.o \
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c

new file mode 100644 (file)

index 0000000..cbb81d1
--- /dev/null
+++ b/src/backend/utils/adt/json.c
@@ -0,0 +1,665 @@
+/*-------------------------------------------------------------------------
+ *
+ * json.c
+ *     JSON data type support.
+ *
+ * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *   src/backend/utils/adt/json.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "lib/stringinfo.h"
+#include "libpq/pqformat.h"
+#include "mb/pg_wchar.h"
+#include "utils/builtins.h"
+#include "utils/json.h"
+
+typedef enum
+{
+   JSON_VALUE_INVALID,
+   JSON_VALUE_STRING,
+   JSON_VALUE_NUMBER,
+   JSON_VALUE_OBJECT,
+   JSON_VALUE_ARRAY,
+   JSON_VALUE_TRUE,
+   JSON_VALUE_FALSE,
+   JSON_VALUE_NULL
+} JsonValueType;
+
+typedef struct
+{
+   char       *input;
+   char       *token_start;
+   char       *token_terminator;
+   JsonValueType   token_type;
+   int         line_number;
+   char       *line_start;
+} JsonLexContext;
+
+typedef enum
+{
+   JSON_PARSE_VALUE,           /* expecting a value */
+   JSON_PARSE_ARRAY_START,     /* saw '[', expecting value or ']' */
+   JSON_PARSE_ARRAY_NEXT,      /* saw array element, expecting ',' or ']' */
+   JSON_PARSE_OBJECT_START,    /* saw '{', expecting label or '}' */
+   JSON_PARSE_OBJECT_LABEL,    /* saw object label, expecting ':' */
+   JSON_PARSE_OBJECT_NEXT,     /* saw object value, expecting ',' or '}' */
+   JSON_PARSE_OBJECT_COMMA     /* saw object ',', expecting next label */
+} JsonParseState;
+
+typedef struct JsonParseStack
+{
+   JsonParseState  state;
+} JsonParseStack;
+
+typedef enum
+{
+   JSON_STACKOP_NONE,
+   JSON_STACKOP_PUSH,
+   JSON_STACKOP_PUSH_WITH_PUSHBACK,
+   JSON_STACKOP_POP
+} JsonStackOp;
+
+static void json_validate_cstring(char *input);
+static void json_lex(JsonLexContext *lex);
+static void json_lex_string(JsonLexContext *lex);
+static void json_lex_number(JsonLexContext *lex, char *s);
+static void report_parse_error(JsonParseStack *stack, JsonLexContext *lex);
+static void report_invalid_token(JsonLexContext *lex);
+static char *extract_mb_char(char *s);
+
+extern Datum json_in(PG_FUNCTION_ARGS);
+
+/*
+ * Input.
+ */
+Datum
+json_in(PG_FUNCTION_ARGS)
+{
+   char    *text = PG_GETARG_CSTRING(0);
+
+   json_validate_cstring(text);
+
+   PG_RETURN_TEXT_P(cstring_to_text(text));
+}
+
+/*
+ * Output.
+ */
+Datum
+json_out(PG_FUNCTION_ARGS)
+{
+   Datum   txt = PG_GETARG_DATUM(0);
+
+   PG_RETURN_CSTRING(TextDatumGetCString(txt));
+}
+
+/*
+ * Binary send.
+ */
+Datum
+json_send(PG_FUNCTION_ARGS)
+{
+   StringInfoData buf;
+   text   *t = PG_GETARG_TEXT_PP(0);
+
+   pq_begintypsend(&buf);
+   pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
+   PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/*
+ * Binary receive.
+ */
+Datum
+json_recv(PG_FUNCTION_ARGS)
+{
+   StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
+   text       *result;
+   char       *str;
+   int         nbytes;
+
+   str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
+
+   /*
+    * We need a null-terminated string to pass to json_validate_cstring().
+    * Rather than make a separate copy, make the temporary result one byte
+    * bigger than it needs to be.
+    */
+   result = palloc(nbytes + 1 + VARHDRSZ);
+   SET_VARSIZE(result, nbytes + VARHDRSZ);
+   memcpy(VARDATA(result), str, nbytes);
+   str = VARDATA(result);
+   str[nbytes] = '\0';
+
+   /* Validate it. */
+   json_validate_cstring(str);
+
+   PG_RETURN_TEXT_P(result);
+}
+
+/*
+ * Check whether supplied input is valid JSON.
+ */
+static void
+json_validate_cstring(char *input)
+{
+   JsonLexContext  lex;
+   JsonParseStack *stack,
+                  *stacktop;
+   int             stacksize;
+
+   /* Set up lexing context. */
+   lex.input = input;
+   lex.token_terminator = lex.input;
+   lex.line_number = 1;
+   lex.line_start = input;
+
+   /* Set up parse stack. */
+   stacksize = 32;
+   stacktop = palloc(sizeof(JsonParseStack) * stacksize);
+   stack = stacktop;
+   stack->state = JSON_PARSE_VALUE;
+
+   /* Main parsing loop. */
+   for (;;)
+   {
+       JsonStackOp op;
+
+       /* Fetch next token. */
+       json_lex(&lex);
+
+       /* Check for unexpected end of input. */
+       if (lex.token_start == NULL)
+           report_parse_error(stack, &lex);
+
+redo:
+       /* Figure out what to do with this token. */
+       op = JSON_STACKOP_NONE;
+       switch (stack->state)
+       {
+           case JSON_PARSE_VALUE:
+               if (lex.token_type != JSON_VALUE_INVALID)
+                   op = JSON_STACKOP_POP;
+               else if (lex.token_start[0] == '[')
+                   stack->state = JSON_PARSE_ARRAY_START;
+               else if (lex.token_start[0] == '{')
+                   stack->state = JSON_PARSE_OBJECT_START;
+               else
+                   report_parse_error(stack, &lex);
+               break;
+           case JSON_PARSE_ARRAY_START:
+               if (lex.token_type != JSON_VALUE_INVALID)
+                   stack->state = JSON_PARSE_ARRAY_NEXT;
+               else if (lex.token_start[0] == ']')
+                   op = JSON_STACKOP_POP;
+               else if (lex.token_start[0] == '['
+                   || lex.token_start[0] == '{')
+               {
+                   stack->state = JSON_PARSE_ARRAY_NEXT;
+                   op = JSON_STACKOP_PUSH_WITH_PUSHBACK;
+               }
+               else
+                   report_parse_error(stack, &lex);
+               break;
+           case JSON_PARSE_ARRAY_NEXT:
+               if (lex.token_type != JSON_VALUE_INVALID)
+                   report_parse_error(stack, &lex);
+               else if (lex.token_start[0] == ']')
+                   op = JSON_STACKOP_POP;
+               else if (lex.token_start[0] == ',')
+                   op = JSON_STACKOP_PUSH;
+               else
+                   report_parse_error(stack, &lex);
+               break;
+           case JSON_PARSE_OBJECT_START:
+               if (lex.token_type == JSON_VALUE_STRING)
+                   stack->state = JSON_PARSE_OBJECT_LABEL;
+               else if (lex.token_type == JSON_VALUE_INVALID
+                   && lex.token_start[0] == '}')
+                   op = JSON_STACKOP_POP;
+               else
+                   report_parse_error(stack, &lex);
+               break;
+           case JSON_PARSE_OBJECT_LABEL:
+               if (lex.token_type == JSON_VALUE_INVALID
+                   && lex.token_start[0] == ':')
+               {
+                   stack->state = JSON_PARSE_OBJECT_NEXT;
+                   op = JSON_STACKOP_PUSH;
+               }
+               else
+                   report_parse_error(stack, &lex);
+               break;
+           case JSON_PARSE_OBJECT_NEXT:
+               if (lex.token_type != JSON_VALUE_INVALID)
+                   report_parse_error(stack, &lex);
+               else if (lex.token_start[0] == '}')
+                   op = JSON_STACKOP_POP;
+               else if (lex.token_start[0] == ',')
+                   stack->state = JSON_PARSE_OBJECT_COMMA;
+               else
+                   report_parse_error(stack, &lex);
+               break;
+           case JSON_PARSE_OBJECT_COMMA:
+               if (lex.token_type == JSON_VALUE_STRING)
+                   stack->state = JSON_PARSE_OBJECT_LABEL;
+               else
+                   report_parse_error(stack, &lex);
+               break;
+           default:
+               elog(ERROR, "unexpected json parse state: %d",
+                       (int) stack->state);
+       }
+
+       /* Push or pop the stack, if needed. */
+       switch (op)
+       {
+           case JSON_STACKOP_PUSH:
+           case JSON_STACKOP_PUSH_WITH_PUSHBACK:
+               ++stack;
+               if (stack >= &stacktop[stacksize])
+               {
+                   int     stackoffset = stack - stacktop;
+                   stacksize = stacksize + 32;
+                   stacktop = repalloc(stacktop,
+                                       sizeof(JsonParseStack) * stacksize);
+                   stack = stacktop + stackoffset;
+               }
+               stack->state = JSON_PARSE_VALUE;
+               if (op == JSON_STACKOP_PUSH_WITH_PUSHBACK)
+                   goto redo;
+               break;
+           case JSON_STACKOP_POP:
+               if (stack == stacktop)
+               {
+                   /* Expect end of input. */
+                   json_lex(&lex);
+                   if (lex.token_start != NULL)
+                       report_parse_error(NULL, &lex);
+                   return;
+               }
+               --stack;
+               break;
+           case JSON_STACKOP_NONE:
+               /* nothing to do */
+               break;
+       }
+   }
+}
+
+/*
+ * Lex one token from the input stream.
+ */
+static void
+json_lex(JsonLexContext *lex)
+{
+   char       *s;
+
+   /* Skip leading whitespace. */
+   s = lex->token_terminator;
+   while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r')
+   {
+       if (*s == '\n')
+           ++lex->line_number;
+       ++s;
+   }
+   lex->token_start = s;
+
+   /* Determine token type. */
+   if (strchr("{}[],:", s[0]))
+   {
+       /* strchr() doesn't return false on a NUL input. */
+       if (s[0] == '\0')
+       {
+           /* End of string. */
+           lex->token_start = NULL;
+           lex->token_terminator = NULL;
+       }
+       else
+       {
+           /* Single-character token, some kind of punctuation mark. */
+           lex->token_terminator = s + 1;
+       }
+       lex->token_type = JSON_VALUE_INVALID;
+   }
+   else if (*s == '"')
+   {
+       /* String. */
+       json_lex_string(lex);
+       lex->token_type = JSON_VALUE_STRING;
+   }
+   else if (*s == '-')
+   {
+       /* Negative number. */
+       json_lex_number(lex, s + 1);
+       lex->token_type = JSON_VALUE_NUMBER;
+   }
+   else if (*s >= '0' && *s <= '9')
+   {
+       /* Positive number. */
+       json_lex_number(lex, s);
+       lex->token_type = JSON_VALUE_NUMBER;
+   }
+   else
+   {
+       char   *p;
+
+       /*
+        * We're not dealing with a string, number, legal punctuation mark,
+        * or end of string.  The only legal tokens we might find here are
+        * true, false, and null, but for error reporting purposes we scan
+        * until we see a non-alphanumeric character.  That way, we can report
+        * the whole word as an unexpected token, rather than just some
+        * unintuitive prefix thereof.
+        */
+       for (p = s; (*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z')
+           || (*p >= '0' && *p <= '9') || *p == '_' || IS_HIGHBIT_SET(*p);
+           ++p)
+           ;
+
+       /*
+        * We got some sort of unexpected punctuation or an otherwise
+        * unexpected character, so just complain about that one character.
+        */
+       if (p == s)
+       {
+           lex->token_terminator = s + 1;
+           report_invalid_token(lex);
+       }
+
+       /*
+        * We've got a real alphanumeric token here.  If it happens to be
+        * true, false, or null, all is well.  If not, error out.
+        */
+       lex->token_terminator = p;
+       if (p - s == 4)
+       {
+           if (memcmp(s, "true", 4) == 0)
+               lex->token_type = JSON_VALUE_TRUE;
+           else if (memcmp(s, "null", 4) == 0)
+               lex->token_type = JSON_VALUE_NULL;
+           else
+               report_invalid_token(lex);
+       }
+       else if (p - s == 5 && memcmp(s, "false", 5) == 0)
+           lex->token_type = JSON_VALUE_FALSE;
+       else
+           report_invalid_token(lex);
+   }
+}
+
+/*
+ * The next token in the input stream is known to be a string; lex it.
+ */
+static void
+json_lex_string(JsonLexContext *lex)
+{
+   char       *s = lex->token_start + 1;
+
+   for (s = lex->token_start + 1; *s != '"'; ++s)
+   {
+       /* Per RFC4627, these characters MUST be escaped. */
+       if (*s < 32)
+       {
+           /* A NUL byte marks the (premature) end of the string. */
+           if (*s == '\0')
+           {
+               lex->token_terminator = s;
+               report_invalid_token(lex);
+           }
+           ereport(ERROR,
+                   (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                    errmsg("invalid input syntax for type json"),
+                    errdetail_internal("line %d: Character \"%c\" must be escaped.",
+                       lex->line_number, *s)));
+       }
+       else if (*s == '\\')
+       {
+           /* OK, we have an escape character. */
+           ++s;
+           if (*s == '\0')
+           {
+               lex->token_terminator = s;
+               report_invalid_token(lex);
+           }
+           else if (*s == 'u')
+           {
+               int     i;
+               int     ch = 0;
+
+               for (i = 1; i <= 4; ++i)
+               {
+                   if (s[i] == '\0')
+                   {
+                       lex->token_terminator = s + i;
+                       report_invalid_token(lex);
+                   }
+                   else if (s[i] >= '0' && s[i] <= '9')
+                       ch = (ch * 16) + (s[i] - '0');
+                   else if (s[i] >= 'a' && s[i] <= 'f')
+                       ch = (ch * 16) + (s[i] - 'a') + 10;
+                   else if (s[i] >= 'A' && s[i] <= 'F')
+                       ch = (ch * 16) + (s[i] - 'A') + 10;
+                   else
+                   {
+                       ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                errmsg("invalid input syntax for type json"),
+                                errdetail_internal("line %d: \"\\u\" must be followed by four hexadecimal digits.",
+                                   lex->line_number)));
+                   }
+               }
+
+               /* Account for the four additional bytes we just parsed. */
+               s += 4;
+           }
+           else if (!strchr("\"\\/bfnrt", *s))
+           {
+               /* Error out. */
+               ereport(ERROR,
+                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                        errmsg("invalid input syntax for type json"),
+                        errdetail_internal("line %d: Invalid escape \"\\%s\".",
+                           lex->line_number, extract_mb_char(s))));
+           }
+       }
+   }
+
+   /* Hooray, we found the end of the string! */
+   lex->token_terminator = s + 1;
+}
+
+/*-------------------------------------------------------------------------
+ * The next token in the input stream is known to be a number; lex it.
+ *
+ * In JSON, a number consists of four parts:
+ *
+ * (1) An optional minus sign ('-').
+ *
+ * (2) Either a single '0', or a string of one or more digits that does not
+ *     begin with a '0'.
+ *
+ * (3) An optional decimal part, consisting of a period ('.') followed by
+ *     one or more digits.  (Note: While this part can be omitted
+ *     completely, it's not OK to have only the decimal point without
+ *     any digits afterwards.)
+ *
+ * (4) An optional exponent part, consisting of 'e' or 'E', optionally
+ *     followed by '+' or '-', followed by one or more digits.  (Note:
+ *     As with the decimal part, if 'e' or 'E' is present, it must be
+ *     followed by at least one digit.)
+ *
+ * The 's' argument to this function points to the ostensible beginning
+ * of part 2 - i.e. the character after any optional minus sign, and the
+ * first character of the string if there is none.
+ *
+ *-------------------------------------------------------------------------
+ */
+static void
+json_lex_number(JsonLexContext *lex, char *s)
+{
+   bool    error = false;
+   char   *p;
+
+   /* Part (1): leading sign indicator. */
+   /* Caller already did this for us; so do nothing. */
+
+   /* Part (2): parse main digit string. */
+   if (*s == '0')
+       ++s;
+   else if (*s >= '1' && *s <= '9')
+   {
+       do
+       {
+           ++s;
+       } while (*s >= '0' && *s <= '9');
+   }
+   else
+       error = true;
+
+   /* Part (3): parse optional decimal portion. */
+   if (*s == '.')
+   {
+       ++s;
+       if (*s < '0' && *s > '9')
+           error = true;
+       else
+       {
+           do
+           {
+               ++s;
+           } while (*s >= '0' && *s <= '9');
+       }
+   }
+
+   /* Part (4): parse optional exponent. */
+   if (*s == 'e' || *s == 'E')
+   {
+       ++s;
+       if (*s == '+' || *s == '-')
+           ++s;
+       if (*s < '0' && *s > '9')
+           error = true;
+       else
+       {
+           do
+           {
+               ++s;
+           } while (*s >= '0' && *s <= '9');
+       }
+   }
+
+   /* Check for trailing garbage. */
+   for (p = s; (*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z')
+       || (*p >= '0' && *p <= '9') || *p == '_' || IS_HIGHBIT_SET(*p); ++p)
+       ;
+   lex->token_terminator = p;
+   if (p > s || error)
+       report_invalid_token(lex);
+}
+
+/*
+ * Report a parse error.
+ */
+static void
+report_parse_error(JsonParseStack *stack, JsonLexContext *lex)
+{
+   char   *detail = NULL;
+   char   *token = NULL;
+   int     toklen;
+
+   /* Handle case where the input ended prematurely. */
+   if (lex->token_start == NULL)
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                errmsg("invalid input syntax for type json: \"%s\"",
+                   lex->input),
+                errdetail_internal("The input string ended unexpectedly.")));
+
+   /* Work out the offending token. */
+   toklen = lex->token_terminator - lex->token_start;
+   token = palloc(toklen + 1);
+   memcpy(token, lex->token_start, toklen);
+   token[toklen] = '\0';
+
+   /* Select correct detail message. */
+   if (stack == NULL)
+       detail = "line %d: Expected end of input, but found \"%s\".";
+   else
+   {
+       switch (stack->state)
+       {
+           case JSON_PARSE_VALUE:
+               detail = "line %d: Expected string, number, object, array, true, false, or null, but found \"%s\".";
+               break;
+           case JSON_PARSE_ARRAY_START:
+               detail = "line %d: Expected array element or \"]\", but found \"%s\".";
+               break;
+           case JSON_PARSE_ARRAY_NEXT:
+               detail = "line %d: Expected \",\" or \"]\", but found \"%s\".";
+               break;
+           case JSON_PARSE_OBJECT_START:
+               detail = "line %d: Expected string or \"}\", but found \"%s\".";
+               break;
+           case JSON_PARSE_OBJECT_LABEL:
+               detail = "line %d: Expected \":\", but found \"%s\".";
+               break;
+           case JSON_PARSE_OBJECT_NEXT:
+               detail = "line %d: Expected \",\" or \"}\", but found \"%s\".";
+               break;
+           case JSON_PARSE_OBJECT_COMMA:
+               detail = "line %d: Expected string, but found \"%s\".";
+               break;
+       }
+   }
+
+   ereport(ERROR,
+           (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+            errmsg("invalid input syntax for type json: \"%s\"",
+               lex->input),
+            errdetail_internal(detail, lex->line_number, token)));
+}
+
+/*
+ * Report an invalid input token.
+ */
+static void
+report_invalid_token(JsonLexContext *lex)
+{
+   char   *token;
+   int     toklen;
+
+   toklen = lex->token_terminator - lex->token_start;
+   token = palloc(toklen + 1);
+   memcpy(token, lex->token_start, toklen);
+   token[toklen] = '\0';
+
+   ereport(ERROR,
+           (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+            errmsg("invalid input syntax for type json"),
+            errdetail_internal("line %d: Token \"%s\" is invalid.",
+               lex->line_number, token)));
+}
+
+/*
+ * Extract a single, possibly multi-byte char from the input string.
+ */
+static char *
+extract_mb_char(char *s)
+{
+   char   *res;
+   int     len;
+
+   len = pg_mblen(s);
+   res = palloc(len + 1);
+   memcpy(res, s, len);
+   res[len] = '\0';
+
+   return res;
+}
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h

index 96551ccae9e0e73cee85f111f77c9963283fa089..5dc6d05478f644e5dbba5d773ec75d8d02aae2ce 100644 (file)
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4022,6 +4022,16 @@ DESCR("determine if a string is well formed XML document");
  DATA(insert OID = 3053 (  xml_is_well_formed_content    PGNSP PGUID 12 1 0 0 0 f f f t f i 1 0 16 "25" _null_ _null_ _null_ _null_ xml_is_well_formed_content _null_ _null_ _null_ ));
  DESCR("determine if a string is well formed XML content");
  
+/* json */
+DATA(insert OID = 321 (  json_in          PGNSP PGUID 12 1 0 0 0 f f f t f s 1 0 114 "2275" _null_ _null_ _null_ _null_ json_in _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 322 (  json_out         PGNSP PGUID 12 1 0 0 0 f f f t f i 1 0 2275 "114" _null_ _null_ _null_ _null_ json_out _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 323 (  json_recv        PGNSP PGUID 12 1 0 0 0 f f f t f s 1 0 114 "2281" _null_ _null_ _null_ _null_    json_recv _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 324 (  json_send        PGNSP PGUID 12 1 0 0 0 f f f t f s 1 0 17 "114" _null_ _null_ _null_ _null_ json_send _null_ _null_ _null_ ));
+DESCR("I/O");
+
  /* uuid */
  DATA(insert OID = 2952 (  uuid_in         PGNSP PGUID 12 1 0 0 0 f f f t f i 1 0 2950 "2275" _null_ _null_ _null_ _null_ uuid_in _null_ _null_ _null_ ));
  DESCR("I/O");
diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h

index d9855a79f8621f9a38a4531d2000e6b5c7aa9a56..e4bca552973a097b9582ef9872ab44ff80b42d5c 100644 (file)
--- a/src/include/catalog/pg_type.h
+++ b/src/include/catalog/pg_type.h
@@ -352,10 +352,13 @@ DATA(insert OID = 81 (    pg_proc         PGNSP PGUID -1 f c C f t \054 1255 0 0 record_i
  DATA(insert OID = 83 ( pg_class        PGNSP PGUID -1 f c C f t \054 1259 0 0 record_in record_out record_recv record_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ ));
  
  /* OIDS 100 - 199 */
+DATA(insert OID = 114 ( json          PGNSP PGUID -1 f b U f t \054 0 0 199 json_in json_out json_recv json_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ ));
+#define JSONOID 114
  DATA(insert OID = 142 ( xml           PGNSP PGUID -1 f b U f t \054 0 0 143 xml_in xml_out xml_recv xml_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ ));
  DESCR("XML content");
  #define XMLOID 142
  DATA(insert OID = 143 ( _xml      PGNSP PGUID -1 f b A f t \054 0 142 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ ));
+DATA(insert OID = 199 ( _json     PGNSP PGUID -1 f b A f t \054 0 114 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ ));
  
  DATA(insert OID = 194 ( pg_node_tree   PGNSP PGUID -1 f b S f t \054 0 0 0 pg_node_tree_in pg_node_tree_out pg_node_tree_recv pg_node_tree_send - - - i x f 0 -1 0 100 _null_ _null_ _null_ ));
  DESCR("string representing an internal node tree");
diff --git a/src/include/utils/json.h b/src/include/utils/json.h

new file mode 100644 (file)

index 0000000..ee87fd6
--- /dev/null
+++ b/src/include/utils/json.h
@@ -0,0 +1,24 @@
+/*-------------------------------------------------------------------------
+ *
+ * json.h
+ *   Declarations for JSON data type support.
+ *
+ * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/json.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef JSON_H
+#define JSON_H
+
+#include "fmgr.h"
+
+extern Datum json_in(PG_FUNCTION_ARGS);
+extern Datum json_out(PG_FUNCTION_ARGS);
+extern Datum json_recv(PG_FUNCTION_ARGS);
+extern Datum json_send(PG_FUNCTION_ARGS);
+
+#endif   /* XML_H */
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out

new file mode 100644 (file)

index 0000000..5ef65f7
--- /dev/null
+++ b/src/test/regress/expected/json.out
@@ -0,0 +1,258 @@
+-- Strings.
+SELECT '""'::json;             -- OK.
+ json 
+------
+ ""
+(1 row)
+
+SELECT $$''$$::json;           -- ERROR, single quotes are not allowed
+ERROR:  invalid input syntax for type json
+LINE 1: SELECT $$''$$::json;
+               ^
+DETAIL:  line 1: Token "'" is invalid.
+SELECT '"abc"'::json;          -- OK
+ json  
+-------
+ "abc"
+(1 row)
+
+SELECT '"abc'::json;           -- ERROR, quotes not closed
+ERROR:  invalid input syntax for type json
+LINE 1: SELECT '"abc'::json;
+               ^
+DETAIL:  line 1: Token ""abc" is invalid.
+SELECT '"abc
+def"'::json;                   -- ERROR, unescaped newline in string constant
+ERROR:  invalid input syntax for type json
+LINE 1: SELECT '"abc
+               ^
+DETAIL:  line 1: Character "
+" must be escaped.
+SELECT '"\n\"\\"'::json;       -- OK, legal escapes
+   json   
+----------
+ "\n\"\\"
+(1 row)
+
+SELECT '"\v"'::json;           -- ERROR, not a valid JSON escape
+ERROR:  invalid input syntax for type json
+LINE 1: SELECT '"\v"'::json;
+               ^
+DETAIL:  line 1: Invalid escape "\v".
+SELECT '"\u"'::json;           -- ERROR, incomplete escape
+ERROR:  invalid input syntax for type json
+LINE 1: SELECT '"\u"'::json;
+               ^
+DETAIL:  line 1: "\u" must be followed by four hexadecimal digits.
+SELECT '"\u00"'::json;         -- ERROR, incomplete escape
+ERROR:  invalid input syntax for type json
+LINE 1: SELECT '"\u00"'::json;
+               ^
+DETAIL:  line 1: "\u" must be followed by four hexadecimal digits.
+SELECT '"\u000g"'::json;       -- ERROR, g is not a hex digit
+ERROR:  invalid input syntax for type json
+LINE 1: SELECT '"\u000g"'::json;
+               ^
+DETAIL:  line 1: "\u" must be followed by four hexadecimal digits.
+SELECT '"\u0000"'::json;       -- OK, legal escape
+   json   
+----------
+ "\u0000"
+(1 row)
+
+SELECT '"\uaBcD"'::json;       -- OK, uppercase and lower case both OK
+   json   
+----------
+ "\uaBcD"
+(1 row)
+
+-- Numbers.
+SELECT '1'::json;              -- OK
+ json 
+------
+ 1
+(1 row)
+
+SELECT '0'::json;              -- OK
+ json 
+------
+ 0
+(1 row)
+
+SELECT '01'::json;             -- ERROR, not valid according to JSON spec
+ERROR:  invalid input syntax for type json
+LINE 1: SELECT '01'::json;
+               ^
+DETAIL:  line 1: Token "01" is invalid.
+SELECT '0.1'::json;                -- OK
+ json 
+------
+ 0.1
+(1 row)
+
+SELECT '9223372036854775808'::json;    -- OK, even though it's too large for int8
+        json         
+---------------------
+ 9223372036854775808
+(1 row)
+
+SELECT '1e100'::json;          -- OK
+ json  
+-------
+ 1e100
+(1 row)
+
+SELECT '1.3e100'::json;            -- OK
+  json   
+---------
+ 1.3e100
+(1 row)
+
+SELECT '1f2'::json;                -- ERROR
+ERROR:  invalid input syntax for type json
+LINE 1: SELECT '1f2'::json;
+               ^
+DETAIL:  line 1: Token "1f2" is invalid.
+-- Arrays.
+SELECT '[]'::json;             -- OK
+ json 
+------
+ []
+(1 row)
+
+SELECT '[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]'::json;  -- OK
+                                                                                                   json                                                                                                   
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
+(1 row)
+
+SELECT '[1,2]'::json;          -- OK
+ json  
+-------
+ [1,2]
+(1 row)
+
+SELECT '[1,2,]'::json;         -- ERROR, trailing comma
+ERROR:  invalid input syntax for type json: "[1,2,]"
+LINE 1: SELECT '[1,2,]'::json;
+               ^
+DETAIL:  line 1: Expected string, number, object, array, true, false, or null, but found "]".
+SELECT '[1,2'::json;           -- ERROR, no closing bracket
+ERROR:  invalid input syntax for type json: "[1,2"
+LINE 1: SELECT '[1,2'::json;
+               ^
+DETAIL:  The input string ended unexpectedly.
+SELECT '[1,[2]'::json;         -- ERROR, no closing bracket
+ERROR:  invalid input syntax for type json: "[1,[2]"
+LINE 1: SELECT '[1,[2]'::json;
+               ^
+DETAIL:  The input string ended unexpectedly.
+-- Objects.
+SELECT '{}'::json;             -- OK
+ json 
+------
+ {}
+(1 row)
+
+SELECT '{"abc"}'::json;            -- ERROR, no value
+ERROR:  invalid input syntax for type json: "{"abc"}"
+LINE 1: SELECT '{"abc"}'::json;
+               ^
+DETAIL:  line 1: Expected ":", but found "}".
+SELECT '{"abc":1}'::json;      -- OK
+   json    
+-----------
+ {"abc":1}
+(1 row)
+
+SELECT '{1:"abc"}'::json;      -- ERROR, keys must be strings
+ERROR:  invalid input syntax for type json: "{1:"abc"}"
+LINE 1: SELECT '{1:"abc"}'::json;
+               ^
+DETAIL:  line 1: Expected string or "}", but found "1".
+SELECT '{"abc",1}'::json;      -- ERROR, wrong separator
+ERROR:  invalid input syntax for type json: "{"abc",1}"
+LINE 1: SELECT '{"abc",1}'::json;
+               ^
+DETAIL:  line 1: Expected ":", but found ",".
+SELECT '{"abc"=1}'::json;      -- ERROR, totally wrong separator
+ERROR:  invalid input syntax for type json
+LINE 1: SELECT '{"abc"=1}'::json;
+               ^
+DETAIL:  line 1: Token "=" is invalid.
+SELECT '{"abc"::1}'::json;     -- ERROR, another wrong separator
+ERROR:  invalid input syntax for type json: "{"abc"::1}"
+LINE 1: SELECT '{"abc"::1}'::json;
+               ^
+DETAIL:  line 1: Expected string, number, object, array, true, false, or null, but found ":".
+SELECT '{"abc":1,"def":2,"ghi":[3,4],"hij":{"klm":5,"nop":[6]}}'::json; -- OK
+                          json                           
+---------------------------------------------------------
+ {"abc":1,"def":2,"ghi":[3,4],"hij":{"klm":5,"nop":[6]}}
+(1 row)
+
+SELECT '{"abc":1:2}'::json;        -- ERROR, colon in wrong spot
+ERROR:  invalid input syntax for type json: "{"abc":1:2}"
+LINE 1: SELECT '{"abc":1:2}'::json;
+               ^
+DETAIL:  line 1: Expected "," or "}", but found ":".
+SELECT '{"abc":1,3}'::json;        -- ERROR, no value
+ERROR:  invalid input syntax for type json: "{"abc":1,3}"
+LINE 1: SELECT '{"abc":1,3}'::json;
+               ^
+DETAIL:  line 1: Expected string, but found "3".
+-- Miscellaneous stuff.
+SELECT 'true'::json;           -- OK
+ json 
+------
+ true
+(1 row)
+
+SELECT 'false'::json;          -- OK
+ json  
+-------
+ false
+(1 row)
+
+SELECT 'null'::json;           -- OK
+ json 
+------
+ null
+(1 row)
+
+SELECT ' true '::json;         -- OK, even with extra whitespace
+  json  
+--------
+  true 
+(1 row)
+
+SELECT 'true false'::json;     -- ERROR, too many values
+ERROR:  invalid input syntax for type json: "true false"
+LINE 1: SELECT 'true false'::json;
+               ^
+DETAIL:  line 1: Expected end of input, but found "false".
+SELECT 'true, false'::json;        -- ERROR, too many values
+ERROR:  invalid input syntax for type json: "true, false"
+LINE 1: SELECT 'true, false'::json;
+               ^
+DETAIL:  line 1: Expected end of input, but found ",".
+SELECT 'truf'::json;           -- ERROR, not a keyword
+ERROR:  invalid input syntax for type json
+LINE 1: SELECT 'truf'::json;
+               ^
+DETAIL:  line 1: Token "truf" is invalid.
+SELECT 'trues'::json;          -- ERROR, not a keyword
+ERROR:  invalid input syntax for type json
+LINE 1: SELECT 'trues'::json;
+               ^
+DETAIL:  line 1: Token "trues" is invalid.
+SELECT ''::json;               -- ERROR, no value
+ERROR:  invalid input syntax for type json: ""
+LINE 1: SELECT ''::json;
+               ^
+DETAIL:  The input string ended unexpectedly.
+SELECT '    '::json;           -- ERROR, no value
+ERROR:  invalid input syntax for type json: "    "
+LINE 1: SELECT '    '::json;
+               ^
+DETAIL:  The input string ended unexpectedly.
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule

index 3bedad07c73438dc230d78cafb8482673c6d5458..d6105f8567884c9da566f059f4eb2ffac0545d0e 100644 (file)
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -92,7 +92,7 @@ test: rules
  # ----------
  # Another group of parallel tests
  # ----------
-test: select_views portals_p2 foreign_key cluster dependency guc bitmapops combocid tsearch tsdicts foreign_data window xmlmap functional_deps advisory_lock
+test: select_views portals_p2 foreign_key cluster dependency guc bitmapops combocid tsearch tsdicts foreign_data window xmlmap functional_deps advisory_lock json
  
  # ----------
  # Another group of parallel tests
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule

index 0b64569849c3e0307398422570a98b40ea6744f1..90726ce97b3ded82c1367f01fa4dd96f87a2d2ed 100644 (file)
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -109,6 +109,7 @@ test: window
  test: xmlmap
  test: functional_deps
  test: advisory_lock
+test: json
  test: plancache
  test: limit
  test: plpgsql
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql

new file mode 100644 (file)

index 0000000..440398b
--- /dev/null
+++ b/src/test/regress/sql/json.sql
@@ -0,0 +1,56 @@
+-- Strings.
+SELECT '""'::json;             -- OK.
+SELECT $$''$$::json;           -- ERROR, single quotes are not allowed
+SELECT '"abc"'::json;          -- OK
+SELECT '"abc'::json;           -- ERROR, quotes not closed
+SELECT '"abc
+def"'::json;                   -- ERROR, unescaped newline in string constant
+SELECT '"\n\"\\"'::json;       -- OK, legal escapes
+SELECT '"\v"'::json;           -- ERROR, not a valid JSON escape
+SELECT '"\u"'::json;           -- ERROR, incomplete escape
+SELECT '"\u00"'::json;         -- ERROR, incomplete escape
+SELECT '"\u000g"'::json;       -- ERROR, g is not a hex digit
+SELECT '"\u0000"'::json;       -- OK, legal escape
+SELECT '"\uaBcD"'::json;       -- OK, uppercase and lower case both OK
+
+-- Numbers.
+SELECT '1'::json;              -- OK
+SELECT '0'::json;              -- OK
+SELECT '01'::json;             -- ERROR, not valid according to JSON spec
+SELECT '0.1'::json;                -- OK
+SELECT '9223372036854775808'::json;    -- OK, even though it's too large for int8
+SELECT '1e100'::json;          -- OK
+SELECT '1.3e100'::json;            -- OK
+SELECT '1f2'::json;                -- ERROR
+
+-- Arrays.
+SELECT '[]'::json;             -- OK
+SELECT '[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]'::json;  -- OK
+SELECT '[1,2]'::json;          -- OK
+SELECT '[1,2,]'::json;         -- ERROR, trailing comma
+SELECT '[1,2'::json;           -- ERROR, no closing bracket
+SELECT '[1,[2]'::json;         -- ERROR, no closing bracket
+
+-- Objects.
+SELECT '{}'::json;             -- OK
+SELECT '{"abc"}'::json;            -- ERROR, no value
+SELECT '{"abc":1}'::json;      -- OK
+SELECT '{1:"abc"}'::json;      -- ERROR, keys must be strings
+SELECT '{"abc",1}'::json;      -- ERROR, wrong separator
+SELECT '{"abc"=1}'::json;      -- ERROR, totally wrong separator
+SELECT '{"abc"::1}'::json;     -- ERROR, another wrong separator
+SELECT '{"abc":1,"def":2,"ghi":[3,4],"hij":{"klm":5,"nop":[6]}}'::json; -- OK
+SELECT '{"abc":1:2}'::json;        -- ERROR, colon in wrong spot
+SELECT '{"abc":1,3}'::json;        -- ERROR, no value
+
+-- Miscellaneous stuff.
+SELECT 'true'::json;           -- OK
+SELECT 'false'::json;          -- OK
+SELECT 'null'::json;           -- OK
+SELECT ' true '::json;         -- OK, even with extra whitespace
+SELECT 'true false'::json;     -- ERROR, too many values
+SELECT 'true, false'::json;        -- ERROR, too many values
+SELECT 'truf'::json;           -- ERROR, not a keyword
+SELECT 'trues'::json;          -- ERROR, not a keyword
+SELECT ''::json;               -- ERROR, no value
+SELECT '    '::json;           -- ERROR, no value
author	Robert Haas
	Tue, 31 Jan 2012 16:48:23 +0000 (11:48 -0500)
committer	Robert Haas
	Tue, 31 Jan 2012 16:48:23 +0000 (11:48 -0500)
doc/src/sgml/datatype.sgml		patch \| blob \| blame \| history
src/backend/commands/explain.c		patch \| blob \| blame \| history
src/backend/utils/adt/Makefile		patch \| blob \| blame \| history
src/backend/utils/adt/json.c	[new file with mode: 0644]	patch \| blob
src/include/catalog/pg_proc.h		patch \| blob \| blame \| history
src/include/catalog/pg_type.h		patch \| blob \| blame \| history
src/include/utils/json.h	[new file with mode: 0644]	patch \| blob
src/test/regress/expected/json.out	[new file with mode: 0644]	patch \| blob
src/test/regress/parallel_schedule		patch \| blob \| blame \| history
src/test/regress/serial_schedule		patch \| blob \| blame \| history
src/test/regress/sql/json.sql	[new file with mode: 0644]	patch \| blob