From 4452000f310b8c1c947ee724618c1bc31ed20242 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 26 Jul 2016 15:25:02 -0400 Subject: [PATCH] Fix constant-folding of ROW(...) IS [NOT] NULL with composite fields. The SQL standard appears to specify that IS [NOT] NULL's tests of field nullness are non-recursive, ie, we shouldn't consider that a composite field with value ROW(NULL,NULL) is null for this purpose. ExecEvalNullTest got this right, but eval_const_expressions did not, leading to weird inconsistencies depending on whether the expression was such that the planner could apply constant folding. Also, adjust the docs to mention that IS [NOT] DISTINCT FROM NULL can be used as a substitute test if a simple null check is wanted for a rowtype argument. That motivated reordering things so that IS [NOT] DISTINCT FROM is described before IS [NOT] NULL. In HEAD, I went a bit further and added a table showing all the comparison-related predicates. Per bug #14235. Back-patch to all supported branches, since it's certainly undesirable that constant-folding should change the semantics. Report and patch by Andrew Gierth; assorted wordsmithing and revised regression test cases by me. Report: <20160708024746.1410.57282@wrigleys.postgresql.org> --- doc/src/sgml/func.sgml | 199 ++++++++++++++++++------- src/backend/executor/execQual.c | 15 ++ src/backend/optimizer/util/clauses.c | 13 +- src/test/regress/expected/rowtypes.out | 54 +++++++ src/test/regress/sql/rowtypes.sql | 24 +++ 5 files changed, 250 insertions(+), 55 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index baef80db5bd..71aae2f4164 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -256,12 +256,111 @@ 3). + + There are also some comparison predicates, as shown in . These behave much like + operators, but have special syntax mandated by the SQL standard. + + + + Comparison Predicates + + + + Predicate + Description + + + + + + a BETWEEN x AND y + between + + + + a NOT BETWEEN x AND y + not between + + + + a BETWEEN SYMMETRIC x AND y + between, after sorting the comparison values + + + + a NOT BETWEEN SYMMETRIC x AND y + not between, after sorting the comparison values + + + + a IS DISTINCT FROM b + not equal, treating null like an ordinary value + + + + a IS NOT DISTINCT FROM b + equal, treating null like an ordinary value + + + + expression IS NULL + is null + + + + expression IS NOT NULL + is not null + + + + expression ISNULL + is null (nonstandard syntax) + + + + expression NOTNULL + is not null (nonstandard syntax) + + + + boolean_expression IS TRUE + is true + + + + boolean_expression IS NOT TRUE + is false or unknown + + + + boolean_expression IS FALSE + is false + + + + boolean_expression IS NOT FALSE + is true or unknown + + + + boolean_expression IS UNKNOWN + is unknown + + + + boolean_expression IS NOT UNKNOWN + is true or false + + + +
+ BETWEEN - In addition to the comparison operators, the special - BETWEEN construct is available: + The BETWEEN predicate simplifies range tests: a BETWEEN x AND y @@ -282,13 +381,39 @@ BETWEEN SYMMETRIC - BETWEEN SYMMETRIC is the same as BETWEEN + BETWEEN SYMMETRIC is like BETWEEN except there is no requirement that the argument to the left of AND be less than or equal to the argument on the right. If it is not, those two arguments are automatically swapped, so that a nonempty range is always implied. + + + IS DISTINCT FROM + + + IS NOT DISTINCT FROM + + Ordinary comparison operators yield null (signifying unknown), + not true or false, when either input is null. For example, + 7 = NULL yields null, as does 7 <> NULL. When + this behavior is not suitable, use the + IS NOT DISTINCT FROM predicates: + +a IS DISTINCT FROM b +a IS NOT DISTINCT FROM b + + For non-null inputs, IS DISTINCT FROM is + the same as the <> operator. However, if both + inputs are null it returns false, and if only one input is + null it returns true. Similarly, IS NOT DISTINCT + FROM is identical to = for non-null + inputs, but it returns true when both inputs are null, and false when only + one input is null. Thus, these predicates effectively act as though null + were a normal data value, rather than unknown. + + IS NULL @@ -302,12 +427,12 @@ NOTNULL - To check whether a value is or is not null, use the constructs: + To check whether a value is or is not null, use the predicates: expression IS NULL expression IS NOT NULL - or the equivalent, but nonstandard, constructs: + or the equivalent, but nonstandard, predicates: expression ISNULL expression NOTNULL @@ -320,8 +445,7 @@ expression = NULL because NULL is not equal to NULL. (The null value represents an unknown value, - and it is not known whether two unknown values are equal.) This - behavior conforms to the SQL standard. + and it is not known whether two unknown values are equal.) @@ -338,7 +462,6 @@ - If the expression is row-valued, then IS NULL is true when the row expression itself is null @@ -346,39 +469,13 @@ IS NOT NULL is true when the row expression itself is non-null and all the row's fields are non-null. Because of this behavior, IS NULL and IS NOT NULL do not always return - inverse results for row-valued expressions, i.e., a row-valued - expression that contains both NULL and non-null values will return false - for both tests. - This definition conforms to the SQL standard, and is a change from the - inconsistent behavior exhibited by PostgreSQL - versions prior to 8.2. - - - - - - IS DISTINCT FROM - - - IS NOT DISTINCT FROM - - Ordinary comparison operators yield null (signifying unknown), - not true or false, when either input is null. For example, - 7 = NULL yields null, as does 7 <> NULL. When - this behavior is not suitable, use the - IS NOT DISTINCT FROM constructs: - -expression IS DISTINCT FROM expression -expression IS NOT DISTINCT FROM expression - - For non-null inputs, IS DISTINCT FROM is - the same as the <> operator. However, if both - inputs are null it returns false, and if only one input is - null it returns true. Similarly, IS NOT DISTINCT - FROM is identical to = for non-null - inputs, but it returns true when both inputs are null, and false when only - one input is null. Thus, these constructs effectively act as though null - were a normal data value, rather than unknown. + inverse results for row-valued expressions; in particular, a row-valued + expression that contains both null and non-null fields will return false + for both tests. In some cases, it may be preferable to + write row IS DISTINCT FROM NULL + or row IS NOT DISTINCT FROM NULL, + which will simply check whether the overall row value is null without any + additional tests on the row fields. @@ -400,14 +497,14 @@ IS NOT UNKNOWN - Boolean values can also be tested using the constructs + Boolean values can also be tested using the predicates -expression IS TRUE -expression IS NOT TRUE -expression IS FALSE -expression IS NOT FALSE -expression IS UNKNOWN -expression IS NOT UNKNOWN +boolean_expression IS TRUE +boolean_expression IS NOT TRUE +boolean_expression IS FALSE +boolean_expression IS NOT FALSE +boolean_expression IS UNKNOWN +boolean_expression IS NOT UNKNOWN These will always return true or false, never a null value, even when the operand is null. @@ -427,7 +524,7 @@ IS NOT OF It is possible to check the data type of an expression using the - constructs + predicates expression IS OF (typename, ...) expression IS NOT OF (typename, ...) @@ -461,7 +558,7 @@ num_nonnulls(VARIADIC "any") - returns the number of non-NULL arguments + returns the number of non-null arguments num_nonnulls(1, NULL, 2) 2 @@ -472,7 +569,7 @@ num_nulls(VARIADIC "any") - returns the number of NULL arguments + returns the number of null arguments num_nulls(1, NULL, 2) 1 diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c index d04d1a89a7f..2b9102125ef 100644 --- a/src/backend/executor/execQual.c +++ b/src/backend/executor/execQual.c @@ -3815,6 +3815,21 @@ ExecEvalNullTest(NullTestState *nstate, if (ntest->argisrow && !(*isNull)) { + /* + * The SQL standard defines IS [NOT] NULL for a non-null rowtype + * argument as: + * + * "R IS NULL" is true if every field is the null value. + * + * "R IS NOT NULL" is true if no field is the null value. + * + * This definition is (apparently intentionally) not recursive; so our + * tests on the fields are primitive attisnull tests, not recursive + * checks to see if they are all-nulls or no-nulls rowtypes. + * + * The standard does not consider the possibility of zero-field rows, + * but here we consider them to vacuously satisfy both predicates. + */ HeapTupleHeader tuple; Oid tupType; int32 tupTypmod; diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 1435f052fa8..a69af7cd7d2 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -3273,7 +3273,7 @@ eval_const_expressions_mutator(Node *node, arg = eval_const_expressions_mutator((Node *) ntest->arg, context); - if (arg && IsA(arg, RowExpr)) + if (ntest->argisrow && arg && IsA(arg, RowExpr)) { /* * We break ROW(...) IS [NOT] NULL into separate tests on @@ -3285,8 +3285,6 @@ eval_const_expressions_mutator(Node *node, List *newargs = NIL; ListCell *l; - Assert(ntest->argisrow); - foreach(l, rarg->args) { Node *relem = (Node *) lfirst(l); @@ -3305,10 +3303,17 @@ eval_const_expressions_mutator(Node *node, return makeBoolConst(false, false); continue; } + + /* + * Else, make a scalar (argisrow == false) NullTest + * for this field. Scalar semantics are required + * because IS [NOT] NULL doesn't recurse; see comments + * in ExecEvalNullTest(). + */ newntest = makeNode(NullTest); newntest->arg = (Expr *) relem; newntest->nulltesttype = ntest->nulltesttype; - newntest->argisrow = type_is_rowtype(exprType(relem)); + newntest->argisrow = false; newntest->location = ntest->location; newargs = lappend(newargs, newntest); } diff --git a/src/test/regress/expected/rowtypes.out b/src/test/regress/expected/rowtypes.out index 3630ef49438..2971640b4bd 100644 --- a/src/test/regress/expected/rowtypes.out +++ b/src/test/regress/expected/rowtypes.out @@ -657,3 +657,57 @@ select row_to_json(r) from (select q2,q1 from tt1 offset 0) r; {"q2":0,"q1":0} (3 rows) +-- +-- IS [NOT] NULL should not recurse into nested composites (bug #14235) +-- +explain (verbose, costs off) +select r, r is null as isnull, r is not null as isnotnull +from (values (1,row(1,2)), (1,row(null,null)), (1,null), + (null,row(1,2)), (null,row(null,null)), (null,null) ) r(a,b); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Values Scan on "*VALUES*" + Output: ROW("*VALUES*".column1, "*VALUES*".column2), (("*VALUES*".column1 IS NULL) AND ("*VALUES*".column2 IS NULL)), (("*VALUES*".column1 IS NOT NULL) AND ("*VALUES*".column2 IS NOT NULL)) +(2 rows) + +select r, r is null as isnull, r is not null as isnotnull +from (values (1,row(1,2)), (1,row(null,null)), (1,null), + (null,row(1,2)), (null,row(null,null)), (null,null) ) r(a,b); + r | isnull | isnotnull +-------------+--------+----------- + (1,"(1,2)") | f | t + (1,"(,)") | f | t + (1,) | f | f + (,"(1,2)") | f | f + (,"(,)") | f | f + (,) | t | f +(6 rows) + +explain (verbose, costs off) +with r(a,b) as + (values (1,row(1,2)), (1,row(null,null)), (1,null), + (null,row(1,2)), (null,row(null,null)), (null,null) ) +select r, r is null as isnull, r is not null as isnotnull from r; + QUERY PLAN +---------------------------------------------------------- + CTE Scan on r + Output: r.*, (r.* IS NULL), (r.* IS NOT NULL) + CTE r + -> Values Scan on "*VALUES*" + Output: "*VALUES*".column1, "*VALUES*".column2 +(5 rows) + +with r(a,b) as + (values (1,row(1,2)), (1,row(null,null)), (1,null), + (null,row(1,2)), (null,row(null,null)), (null,null) ) +select r, r is null as isnull, r is not null as isnotnull from r; + r | isnull | isnotnull +-------------+--------+----------- + (1,"(1,2)") | f | t + (1,"(,)") | f | t + (1,) | f | f + (,"(1,2)") | f | f + (,"(,)") | f | f + (,) | t | f +(6 rows) + diff --git a/src/test/regress/sql/rowtypes.sql b/src/test/regress/sql/rowtypes.sql index 677d34a62c8..a62dee2ef84 100644 --- a/src/test/regress/sql/rowtypes.sql +++ b/src/test/regress/sql/rowtypes.sql @@ -286,3 +286,27 @@ create temp table tt1 as select * from int8_tbl limit 2; create temp table tt2 () inherits(tt1); insert into tt2 values(0,0); select row_to_json(r) from (select q2,q1 from tt1 offset 0) r; + +-- +-- IS [NOT] NULL should not recurse into nested composites (bug #14235) +-- + +explain (verbose, costs off) +select r, r is null as isnull, r is not null as isnotnull +from (values (1,row(1,2)), (1,row(null,null)), (1,null), + (null,row(1,2)), (null,row(null,null)), (null,null) ) r(a,b); + +select r, r is null as isnull, r is not null as isnotnull +from (values (1,row(1,2)), (1,row(null,null)), (1,null), + (null,row(1,2)), (null,row(null,null)), (null,null) ) r(a,b); + +explain (verbose, costs off) +with r(a,b) as + (values (1,row(1,2)), (1,row(null,null)), (1,null), + (null,row(1,2)), (null,row(null,null)), (null,null) ) +select r, r is null as isnull, r is not null as isnotnull from r; + +with r(a,b) as + (values (1,row(1,2)), (1,row(null,null)), (1,null), + (null,row(1,2)), (null,row(null,null)), (null,null) ) +select r, r is null as isnull, r is not null as isnotnull from r; -- 2.39.5