Implement parse_datetime() function
authorAlexander Korotkov
Wed, 25 Sep 2019 18:50:55 +0000 (21:50 +0300)
committerAlexander Korotkov
Wed, 25 Sep 2019 19:51:51 +0000 (22:51 +0300)
This commit adds parse_datetime() function, which implements datetime
parsing with extended features demanded by upcoming jsonpath .datetime()
method:

 * Dynamic type identification based on template string,
 * Support for standard-conforming 'strict' mode,
 * Timezone offset is returned as separate value.

Extracted from original patch by Nikita Glukhov, Teodor Sigaev, Oleg Bartunov.
Revised by me.

Discussion: https://postgr.es/m/fcc6fc6a-b497-f39a-923d-aa34d0c588e8%402ndQuadrant.com
Discussion: https://postgr.es/m/CAPpHfdsZgYEra_PeCLGNoXOWYx6iU-S3wF8aX0ObQUcZU%2B4XTw%40mail.gmail.com
Author: Nikita Glukhov, Teodor Sigaev, Oleg Bartunov, Alexander Korotkov
Reviewed-by: Anastasia Lubennikova, Peter Eisentraut
src/backend/utils/adt/date.c
src/backend/utils/adt/formatting.c
src/include/utils/date.h
src/include/utils/formatting.h

index 4b1afb10f920ae7008376d1795dcba63fa8e8d46..9e291b5c7bc2c8b0769d476a8e3c0ab4e53602bb 100644 (file)
 #endif
 
 
-static int tm2time(struct pg_tm *tm, fsec_t fsec, TimeADT *result);
-static int tm2timetz(struct pg_tm *tm, fsec_t fsec, int tz, TimeTzADT *result);
-static void AdjustTimeForTypmod(TimeADT *time, int32 typmod);
-
-
 /* common code for timetypmodin and timetztypmodin */
 static int32
 anytime_typmodin(bool istz, ArrayType *ta)
@@ -1203,7 +1198,7 @@ time_in(PG_FUNCTION_ARGS)
 /* tm2time()
  * Convert a tm structure to a time data type.
  */
-static int
+int
 tm2time(struct pg_tm *tm, fsec_t fsec, TimeADT *result)
 {
    *result = ((((tm->tm_hour * MINS_PER_HOUR + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec)
@@ -1379,7 +1374,7 @@ time_scale(PG_FUNCTION_ARGS)
  * have a fundamental tie together but rather a coincidence of
  * implementation. - thomas
  */
-static void
+void
 AdjustTimeForTypmod(TimeADT *time, int32 typmod)
 {
    static const int64 TimeScales[MAX_TIME_PRECISION + 1] = {
@@ -1957,7 +1952,7 @@ time_part(PG_FUNCTION_ARGS)
 /* tm2timetz()
  * Convert a tm structure to a time data type.
  */
-static int
+int
 tm2timetz(struct pg_tm *tm, fsec_t fsec, int tz, TimeTzADT *result)
 {
    result->time = ((((tm->tm_hour * MINS_PER_HOUR + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec) *
index d2f7666eed65e9a2815fb36074e9a97b66156017..462c333544cf026aa9b15d040cfa4cd22cd9a990 100644 (file)
@@ -992,6 +992,11 @@ typedef struct NUMProc
               *L_currency_symbol;
 } NUMProc;
 
+/* Return flags for DCH_from_char() */
+#define DCH_DATED  0x01
+#define DCH_TIMED  0x02
+#define DCH_ZONED  0x04
+
 /* ----------
  * Functions
  * ----------
@@ -1025,7 +1030,8 @@ static int    from_char_parse_int(int *dest, char **src, FormatNode *node);
 static int seq_search(char *name, const char *const *array, int type, int max, int *len);
 static int from_char_seq_search(int *dest, char **src, const char *const *array, int type, int max, FormatNode *node);
 static void do_to_timestamp(text *date_txt, text *fmt, bool std,
-                           struct pg_tm *tm, fsec_t *fsec, int *fprec);
+                           struct pg_tm *tm, fsec_t *fsec, int *fprec,
+                           uint32 *flags);
 static char *fill_str(char *str, int c, int max);
 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
 static char *int_to_roman(int number);
@@ -3517,6 +3523,109 @@ DCH_prevent_counter_overflow(void)
    }
 }
 
+/* Get mask of date/time/zone components present in format nodes. */
+static int
+DCH_datetime_type(FormatNode *node)
+{
+   FormatNode *n;
+   int         flags = 0;
+
+   for (n = node; n->type != NODE_TYPE_END; n++)
+   {
+       if (n->type != NODE_TYPE_ACTION)
+           continue;
+
+       switch (n->key->id)
+       {
+           case DCH_FX:
+               break;
+           case DCH_A_M:
+           case DCH_P_M:
+           case DCH_a_m:
+           case DCH_p_m:
+           case DCH_AM:
+           case DCH_PM:
+           case DCH_am:
+           case DCH_pm:
+           case DCH_HH:
+           case DCH_HH12:
+           case DCH_HH24:
+           case DCH_MI:
+           case DCH_SS:
+           case DCH_MS:        /* millisecond */
+           case DCH_US:        /* microsecond */
+           case DCH_FF1:
+           case DCH_FF2:
+           case DCH_FF3:
+           case DCH_FF4:
+           case DCH_FF5:
+           case DCH_FF6:
+           case DCH_SSSS:
+               flags |= DCH_TIMED;
+               break;
+           case DCH_tz:
+           case DCH_TZ:
+           case DCH_OF:
+               ereport(ERROR,
+                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                        errmsg("formatting field \"%s\" is only supported in to_char",
+                               n->key->name)));
+               flags |= DCH_ZONED;
+               break;
+           case DCH_TZH:
+           case DCH_TZM:
+               flags |= DCH_ZONED;
+               break;
+           case DCH_A_D:
+           case DCH_B_C:
+           case DCH_a_d:
+           case DCH_b_c:
+           case DCH_AD:
+           case DCH_BC:
+           case DCH_ad:
+           case DCH_bc:
+           case DCH_MONTH:
+           case DCH_Month:
+           case DCH_month:
+           case DCH_MON:
+           case DCH_Mon:
+           case DCH_mon:
+           case DCH_MM:
+           case DCH_DAY:
+           case DCH_Day:
+           case DCH_day:
+           case DCH_DY:
+           case DCH_Dy:
+           case DCH_dy:
+           case DCH_DDD:
+           case DCH_IDDD:
+           case DCH_DD:
+           case DCH_D:
+           case DCH_ID:
+           case DCH_WW:
+           case DCH_Q:
+           case DCH_CC:
+           case DCH_Y_YYY:
+           case DCH_YYYY:
+           case DCH_IYYY:
+           case DCH_YYY:
+           case DCH_IYY:
+           case DCH_YY:
+           case DCH_IY:
+           case DCH_Y:
+           case DCH_I:
+           case DCH_RM:
+           case DCH_rm:
+           case DCH_W:
+           case DCH_J:
+               flags |= DCH_DATED;
+               break;
+       }
+   }
+
+   return flags;
+}
+
 /* select a DCHCacheEntry to hold the given format picture */
 static DCHCacheEntry *
 DCH_cache_getnew(const char *str, bool std)
@@ -3808,7 +3917,7 @@ to_timestamp(PG_FUNCTION_ARGS)
    fsec_t      fsec;
    int         fprec;
 
-   do_to_timestamp(date_txt, fmt, false, &tm, &fsec, &fprec);
+   do_to_timestamp(date_txt, fmt, false, &tm, &fsec, &fprec, NULL);
 
    /* Use the specified time zone, if any. */
    if (tm.tm_zone)
@@ -3847,7 +3956,7 @@ to_date(PG_FUNCTION_ARGS)
    struct pg_tm tm;
    fsec_t      fsec;
 
-   do_to_timestamp(date_txt, fmt, false, &tm, &fsec, NULL);
+   do_to_timestamp(date_txt, fmt, false, &tm, &fsec, NULL, NULL);
 
    /* Prevent overflow in Julian-day routines */
    if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
@@ -3868,6 +3977,176 @@ to_date(PG_FUNCTION_ARGS)
    PG_RETURN_DATEADT(result);
 }
 
+/*
+ * Convert the 'date_txt' input to a datetime type using argument 'fmt' as a format string.
+ * The actual data type (returned in 'typid', 'typmod') is determined by
+ * the presence of date/time/zone components in the format string.
+ *
+ * When timezone component is present, the corresponding offset is set to '*tz'.
+ */
+Datum
+parse_datetime(text *date_txt, text *fmt, bool strict, Oid *typid,
+              int32 *typmod, int *tz)
+{
+   struct pg_tm tm;
+   fsec_t      fsec;
+   int         fprec = 0;
+   uint32      flags;
+
+   do_to_timestamp(date_txt, fmt, strict, &tm, &fsec, &fprec, &flags);
+
+   *typmod = fprec ? fprec : -1;   /* fractional part precision */
+
+   if (flags & DCH_DATED)
+   {
+       if (flags & DCH_TIMED)
+       {
+           if (flags & DCH_ZONED)
+           {
+               TimestampTz result;
+
+               if (tm.tm_zone)
+               {
+                   int         dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
+
+                   if (dterr)
+                       DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
+               }
+               else
+               {
+                   /*
+                    * Time zone is present in format string, but not in input
+                    * string.  Assuming do_to_timestamp() triggers no error
+                    * this should be possible only in non-strict case.
+                    */
+                   Assert(!strict);
+
+                   ereport(ERROR,
+                           (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+                            errmsg("missing time zone in input string for type timestamptz")));
+               }
+
+               if (tm2timestamp(&tm, fsec, tz, &result) != 0)
+                   ereport(ERROR,
+                           (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+                            errmsg("timestamptz out of range")));
+
+               AdjustTimestampForTypmod(&result, *typmod);
+
+               *typid = TIMESTAMPTZOID;
+               return TimestampTzGetDatum(result);
+           }
+           else
+           {
+               Timestamp   result;
+
+               if (tm2timestamp(&tm, fsec, NULL, &result) != 0)
+                   ereport(ERROR,
+                           (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+                            errmsg("timestamp out of range")));
+
+               AdjustTimestampForTypmod(&result, *typmod);
+
+               *typid = TIMESTAMPOID;
+               return TimestampGetDatum(result);
+           }
+       }
+       else
+       {
+           if (flags & DCH_ZONED)
+           {
+               ereport(ERROR,
+                       (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+                        errmsg("datetime format is zoned but not timed")));
+           }
+           else
+           {
+               DateADT     result;
+
+               /* Prevent overflow in Julian-day routines */
+               if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
+                   ereport(ERROR,
+                           (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+                            errmsg("date out of range: \"%s\"",
+                                   text_to_cstring(date_txt))));
+
+               result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) -
+                   POSTGRES_EPOCH_JDATE;
+
+               /* Now check for just-out-of-range dates */
+               if (!IS_VALID_DATE(result))
+                   ereport(ERROR,
+                           (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+                            errmsg("date out of range: \"%s\"",
+                                   text_to_cstring(date_txt))));
+
+               *typid = DATEOID;
+               return DateADTGetDatum(result);
+           }
+       }
+   }
+   else if (flags & DCH_TIMED)
+   {
+       if (flags & DCH_ZONED)
+       {
+           TimeTzADT  *result = palloc(sizeof(TimeTzADT));
+
+           if (tm.tm_zone)
+           {
+               int         dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
+
+               if (dterr)
+                   DateTimeParseError(dterr, text_to_cstring(date_txt), "timetz");
+           }
+           else
+           {
+               /*
+                * Time zone is present in format string, but not in input
+                * string.  Assuming do_to_timestamp() triggers no error this
+                * should be possible only in non-strict case.
+                */
+               Assert(!strict);
+
+               ereport(ERROR,
+                       (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+                        errmsg("missing time zone in input string for type timetz")));
+           }
+
+           if (tm2timetz(&tm, fsec, *tz, result) != 0)
+               ereport(ERROR,
+                       (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+                        errmsg("timetz out of range")));
+
+           AdjustTimeForTypmod(&result->time, *typmod);
+
+           *typid = TIMETZOID;
+           return TimeTzADTPGetDatum(result);
+       }
+       else
+       {
+           TimeADT     result;
+
+           if (tm2time(&tm, fsec, &result) != 0)
+               ereport(ERROR,
+                       (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+                        errmsg("time out of range")));
+
+           AdjustTimeForTypmod(&result, *typmod);
+
+           *typid = TIMEOID;
+           return TimeADTGetDatum(result);
+       }
+   }
+   else
+   {
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+                errmsg("datetime format is not dated and not timed")));
+   }
+
+   return (Datum) 0;
+}
+
 /*
  * do_to_timestamp: shared code for to_timestamp and to_date
  *
@@ -3883,7 +4162,8 @@ to_date(PG_FUNCTION_ARGS)
  */
 static void
 do_to_timestamp(text *date_txt, text *fmt, bool std,
-               struct pg_tm *tm, fsec_t *fsec, int *fprec)
+               struct pg_tm *tm, fsec_t *fsec, int *fprec,
+               uint32 *flags)
 {
    FormatNode *format;
    TmFromChar  tmfc;
@@ -3940,6 +4220,9 @@ do_to_timestamp(text *date_txt, text *fmt, bool std,
 
        pfree(fmt_str);
 
+       if (flags)
+           *flags = DCH_datetime_type(format);
+
        if (!incache)
            pfree(format);
    }
index bec129aff1c945d2f06536950e49600b8a0da4d1..bd15bfa5bb0f985f742b9cc97fcaa9d1e5831fb3 100644 (file)
@@ -76,5 +76,8 @@ extern TimeTzADT *GetSQLCurrentTime(int32 typmod);
 extern TimeADT GetSQLLocalTime(int32 typmod);
 extern int time2tm(TimeADT time, struct pg_tm *tm, fsec_t *fsec);
 extern int timetz2tm(TimeTzADT *time, struct pg_tm *tm, fsec_t *fsec, int *tzp);
+extern int tm2time(struct pg_tm *tm, fsec_t fsec, TimeADT *result);
+extern int tm2timetz(struct pg_tm *tm, fsec_t fsec, int tz, TimeTzADT *result);
+extern void AdjustTimeForTypmod(TimeADT *time, int32 typmod);
 
 #endif                         /* DATE_H */
index 0117144779e1201a3a179faf446088a855338e15..beeaf10c3321206084c4bb382fdf011b0f6b555b 100644 (file)
@@ -26,4 +26,7 @@ extern char *asc_tolower(const char *buff, size_t nbytes);
 extern char *asc_toupper(const char *buff, size_t nbytes);
 extern char *asc_initcap(const char *buff, size_t nbytes);
 
+extern Datum parse_datetime(text *date_txt, text *fmt, bool std,
+                           Oid *typid, int32 *typmod, int *tz);
+
 #endif