Fix some corner-case issues in REFRESH MATERIALIZED VIEW CONCURRENTLY.
authorTom Lane
Mon, 19 Mar 2018 22:49:53 +0000 (18:49 -0400)
committerTom Lane
Mon, 19 Mar 2018 22:49:53 +0000 (18:49 -0400)
refresh_by_match_merge() has some issues in the way it builds a SQL
query to construct the "diff" table:

1. It doesn't require the selected unique index(es) to be indimmediate.
2. It doesn't pay attention to the particular equality semantics enforced
by a given index, but just assumes that they must be those of the column
datatype's default btree opclass.
3. It doesn't check that the indexes are btrees.
4. It's insufficiently careful to ensure that the parser will pick the
intended operator when parsing the query.  (This would have been a
security bug before CVE-2018-1058.)
5. It's not careful about indexes on system columns.

The way to fix #4 is to make use of the existing code in ri_triggers.c
for generating an arbitrary binary operator clause.  I chose to move
that to ruleutils.c, since that seems a more reasonable place to be
exporting such functionality from than ri_triggers.c.

While #1, #3, and #5 are just latent given existing feature restrictions,
and #2 doesn't arise in the core system for lack of alternate opclasses
with different equality behaviors, #4 seems like an issue worth
back-patching.  That's the bulk of the change anyway, so just back-patch
the whole thing to 9.4 where this code was introduced.

Discussion: https://postgr.es/m/13836.1521413227@sss.pgh.pa.us

src/backend/commands/matview.c
src/backend/utils/adt/ri_triggers.c
src/backend/utils/adt/ruleutils.c
src/include/utils/builtins.h

index 14ead4ce66f0a39e1b5d6eb1b17da7e9b4fbeb0c..37da47a4accf90d1268c7d149bcfdd24d2967915 100644 (file)
@@ -20,6 +20,8 @@
 #include "catalog/catalog.h"
 #include "catalog/indexing.h"
 #include "catalog/namespace.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_opclass.h"
 #include "catalog/pg_operator.h"
 #include "commands/cluster.h"
 #include "commands/matview.h"
@@ -38,7 +40,6 @@
 #include "utils/rel.h"
 #include "utils/snapmgr.h"
 #include "utils/syscache.h"
-#include "utils/typcache.h"
 
 
 typedef struct
@@ -60,14 +61,11 @@ static void transientrel_shutdown(DestReceiver *self);
 static void transientrel_destroy(DestReceiver *self);
 static void refresh_matview_datafill(DestReceiver *dest, Query *query,
                         const char *queryString);
-
 static char *make_temptable_name_n(char *tempname, int n);
-static void mv_GenerateOper(StringInfo buf, Oid opoid);
-
 static void refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner,
                         int save_sec_context);
 static void refresh_by_heap_swap(Oid matviewOid, Oid OIDNewHeap);
-
+static bool is_usable_unique_index(Relation indexRel);
 static void OpenMatViewIncrementalMaintenance(void);
 static void CloseMatViewIncrementalMaintenance(void);
 
@@ -477,25 +475,6 @@ make_temptable_name_n(char *tempname, int n)
    return namebuf.data;
 }
 
-static void
-mv_GenerateOper(StringInfo buf, Oid opoid)
-{
-   HeapTuple   opertup;
-   Form_pg_operator operform;
-
-   opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(opoid));
-   if (!HeapTupleIsValid(opertup))
-       elog(ERROR, "cache lookup failed for operator %u", opoid);
-   operform = (Form_pg_operator) GETSTRUCT(opertup);
-   Assert(operform->oprkind == 'b');
-
-   appendStringInfo(buf, "OPERATOR(%s.%s)",
-               quote_identifier(get_namespace_name(operform->oprnamespace)),
-                    NameStr(operform->oprname));
-
-   ReleaseSysCache(opertup);
-}
-
 /*
  * refresh_by_match_merge
  *
@@ -543,7 +522,7 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner,
    List       *indexoidlist;
    ListCell   *indexoidscan;
    int16       relnatts;
-   bool       *usedForQual;
+   Oid        *opUsedForQual;
 
    initStringInfo(&querybuf);
    matviewRel = heap_open(matviewOid, NoLock);
@@ -555,7 +534,6 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner,
    diffname = make_temptable_name_n(tempname, 2);
 
    relnatts = matviewRel->rd_rel->relnatts;
-   usedForQual = (bool *) palloc0(sizeof(bool) * relnatts);
 
    /* Open SPI context. */
    if (SPI_connect() != SPI_OK_CONNECT)
@@ -619,45 +597,82 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner,
     * include all rows.
     */
    tupdesc = matviewRel->rd_att;
+   opUsedForQual = (Oid *) palloc0(sizeof(Oid) * relnatts);
    foundUniqueIndex = false;
+
    indexoidlist = RelationGetIndexList(matviewRel);
 
    foreach(indexoidscan, indexoidlist)
    {
        Oid         indexoid = lfirst_oid(indexoidscan);
        Relation    indexRel;
-       Form_pg_index indexStruct;
 
        indexRel = index_open(indexoid, RowExclusiveLock);
-       indexStruct = indexRel->rd_index;
-
-       /*
-        * We're only interested if it is unique, valid, contains no
-        * expressions, and is not partial.
-        */
-       if (indexStruct->indisunique &&
-           IndexIsValid(indexStruct) &&
-           RelationGetIndexExpressions(indexRel) == NIL &&
-           RelationGetIndexPredicate(indexRel) == NIL)
+       if (is_usable_unique_index(indexRel))
        {
+           Form_pg_index indexStruct = indexRel->rd_index;
            int         numatts = indexStruct->indnatts;
+           oidvector  *indclass;
+           Datum       indclassDatum;
+           bool        isnull;
            int         i;
 
+           /* Must get indclass the hard way. */
+           indclassDatum = SysCacheGetAttr(INDEXRELID,
+                                           indexRel->rd_indextuple,
+                                           Anum_pg_index_indclass,
+                                           &isnull);
+           Assert(!isnull);
+           indclass = (oidvector *) DatumGetPointer(indclassDatum);
+
            /* Add quals for all columns from this index. */
            for (i = 0; i < numatts; i++)
            {
                int         attnum = indexStruct->indkey.values[i];
-               Oid         type;
+               Oid         opclass = indclass->values[i];
+               Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum - 1);
+               Oid         attrtype = attr->atttypid;
+               HeapTuple   cla_ht;
+               Form_pg_opclass cla_tup;
+               Oid         opfamily;
+               Oid         opcintype;
                Oid         op;
-               const char *colname;
+               const char *leftop;
+               const char *rightop;
+
+               /*
+                * Identify the equality operator associated with this index
+                * column.  First we need to look up the column's opclass.
+                */
+               cla_ht = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass));
+               if (!HeapTupleIsValid(cla_ht))
+                   elog(ERROR, "cache lookup failed for opclass %u", opclass);
+               cla_tup = (Form_pg_opclass) GETSTRUCT(cla_ht);
+               Assert(cla_tup->opcmethod == BTREE_AM_OID);
+               opfamily = cla_tup->opcfamily;
+               opcintype = cla_tup->opcintype;
+               ReleaseSysCache(cla_ht);
+
+               op = get_opfamily_member(opfamily, opcintype, opcintype,
+                                        BTEqualStrategyNumber);
+               if (!OidIsValid(op))
+                   elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
+                        BTEqualStrategyNumber, opcintype, opcintype, opfamily);
 
                /*
-                * Only include the column once regardless of how many times
-                * it shows up in how many indexes.
+                * If we find the same column with the same equality semantics
+                * in more than one index, we only need to emit the equality
+                * clause once.
+                *
+                * Since we only remember the last equality operator, this
+                * code could be fooled into emitting duplicate clauses given
+                * multiple indexes with several different opclasses ... but
+                * that's so unlikely it doesn't seem worth spending extra
+                * code to avoid.
                 */
-               if (usedForQual[attnum - 1])
+               if (opUsedForQual[attnum - 1] == op)
                    continue;
-               usedForQual[attnum - 1] = true;
+               opUsedForQual[attnum - 1] = op;
 
                /*
                 * Actually add the qual, ANDed with any others.
@@ -665,12 +680,15 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner,
                if (foundUniqueIndex)
                    appendStringInfoString(&querybuf, " AND ");
 
-               colname = quote_identifier(NameStr((tupdesc->attrs[attnum - 1])->attname));
-               appendStringInfo(&querybuf, "newdata.%s ", colname);
-               type = attnumTypeId(matviewRel, attnum);
-               op = lookup_type_cache(type, TYPECACHE_EQ_OPR)->eq_opr;
-               mv_GenerateOper(&querybuf, op);
-               appendStringInfo(&querybuf, " mv.%s", colname);
+               leftop = quote_qualified_identifier("newdata",
+                                                   NameStr(attr->attname));
+               rightop = quote_qualified_identifier("mv",
+                                                    NameStr(attr->attname));
+
+               generate_operator_clause(&querybuf,
+                                        leftop, attrtype,
+                                        op,
+                                        rightop, attrtype);
 
                foundUniqueIndex = true;
            }
@@ -762,6 +780,51 @@ refresh_by_heap_swap(Oid matviewOid, Oid OIDNewHeap)
                     RecentXmin, ReadNextMultiXactId());
 }
 
+/*
+ * Check whether specified index is usable for match merge.
+ */
+static bool
+is_usable_unique_index(Relation indexRel)
+{
+   Form_pg_index indexStruct = indexRel->rd_index;
+
+   /*
+    * Must be unique, valid, immediate, non-partial, and be defined over
+    * plain user columns (not expressions).  We also require it to be a
+    * btree.  Even if we had any other unique index kinds, we'd not know how
+    * to identify the corresponding equality operator, nor could we be sure
+    * that the planner could implement the required FULL JOIN with non-btree
+    * operators.
+    */
+   if (indexStruct->indisunique &&
+       indexStruct->indimmediate &&
+       indexRel->rd_rel->relam == BTREE_AM_OID &&
+       IndexIsValid(indexStruct) &&
+       RelationGetIndexPredicate(indexRel) == NIL &&
+       indexStruct->indnatts > 0)
+   {
+       /*
+        * The point of groveling through the index columns individually is to
+        * reject both index expressions and system columns.  Currently,
+        * matviews couldn't have OID columns so there's no way to create an
+        * index on a system column; but maybe someday that wouldn't be true,
+        * so let's be safe.
+        */
+       int         numatts = indexStruct->indnatts;
+       int         i;
+
+       for (i = 0; i < numatts; i++)
+       {
+           int         attnum = indexStruct->indkey.values[i];
+
+           if (attnum <= 0)
+               return false;
+       }
+       return true;
+   }
+   return false;
+}
+
 
 /*
  * This should be used to test whether the backend is in a context where it is
index 2f225f881896895e4ceaa0c73e28734c358e74a1..35abb7f17d2e25ba0a8b8ae3ac64b587f4acd46d 100644 (file)
@@ -207,7 +207,6 @@ static void ri_GenerateQual(StringInfo buf,
                const char *leftop, Oid leftoptype,
                Oid opoid,
                const char *rightop, Oid rightoptype);
-static void ri_add_cast_to(StringInfo buf, Oid typid);
 static void ri_GenerateQualCollation(StringInfo buf, Oid collation);
 static int ri_NullCheck(HeapTuple tup,
             const RI_ConstraintInfo *riinfo, bool rel_is_pk);
@@ -2547,13 +2546,10 @@ quoteRelationName(char *buffer, Relation rel)
 /*
  * ri_GenerateQual --- generate a WHERE clause equating two variables
  *
- * The idea is to append " sep leftop op rightop" to buf.  The complexity
- * comes from needing to be sure that the parser will select the desired
- * operator.  We always name the operator using OPERATOR(schema.op) syntax
- * (readability isn't a big priority here), so as to avoid search-path
- * uncertainties.  We have to emit casts too, if either input isn't already
- * the input type of the operator; else we are at the mercy of the parser's
- * heuristics for ambiguous-operator resolution.
+ * This basically appends " sep leftop op rightop" to buf, adding casts
+ * and schema qualification as needed to ensure that the parser will select
+ * the operator we specify.  leftop and rightop should be parenthesized
+ * if they aren't variables or parameters.
  */
 static void
 ri_GenerateQual(StringInfo buf,
@@ -2562,60 +2558,9 @@ ri_GenerateQual(StringInfo buf,
                Oid opoid,
                const char *rightop, Oid rightoptype)
 {
-   HeapTuple   opertup;
-   Form_pg_operator operform;
-   char       *oprname;
-   char       *nspname;
-
-   opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(opoid));
-   if (!HeapTupleIsValid(opertup))
-       elog(ERROR, "cache lookup failed for operator %u", opoid);
-   operform = (Form_pg_operator) GETSTRUCT(opertup);
-   Assert(operform->oprkind == 'b');
-   oprname = NameStr(operform->oprname);
-
-   nspname = get_namespace_name(operform->oprnamespace);
-
-   appendStringInfo(buf, " %s %s", sep, leftop);
-   if (leftoptype != operform->oprleft)
-       ri_add_cast_to(buf, operform->oprleft);
-   appendStringInfo(buf, " OPERATOR(%s.", quote_identifier(nspname));
-   appendStringInfoString(buf, oprname);
-   appendStringInfo(buf, ") %s", rightop);
-   if (rightoptype != operform->oprright)
-       ri_add_cast_to(buf, operform->oprright);
-
-   ReleaseSysCache(opertup);
-}
-
-/*
- * Add a cast specification to buf.  We spell out the type name the hard way,
- * intentionally not using format_type_be().  This is to avoid corner cases
- * for CHARACTER, BIT, and perhaps other types, where specifying the type
- * using SQL-standard syntax results in undesirable data truncation.  By
- * doing it this way we can be certain that the cast will have default (-1)
- * target typmod.
- */
-static void
-ri_add_cast_to(StringInfo buf, Oid typid)
-{
-   HeapTuple   typetup;
-   Form_pg_type typform;
-   char       *typname;
-   char       *nspname;
-
-   typetup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid));
-   if (!HeapTupleIsValid(typetup))
-       elog(ERROR, "cache lookup failed for type %u", typid);
-   typform = (Form_pg_type) GETSTRUCT(typetup);
-
-   typname = NameStr(typform->typname);
-   nspname = get_namespace_name(typform->typnamespace);
-
-   appendStringInfo(buf, "::%s.%s",
-                    quote_identifier(nspname), quote_identifier(typname));
-
-   ReleaseSysCache(typetup);
+   appendStringInfo(buf, " %s ", sep);
+   generate_operator_clause(buf, leftop, leftoptype, opoid,
+                            rightop, rightoptype);
 }
 
 /*
index a3c5ff418ffef838abc062fbbf86f7ca756c3978..d161c5c2526f0ca3ac9462bca7a7c33f42ea8beb 100644 (file)
@@ -430,6 +430,7 @@ static char *generate_function_name(Oid funcid, int nargs,
                       List *argnames, Oid *argtypes,
                       bool has_variadic, bool *use_variadic_p);
 static char *generate_operator_name(Oid operid, Oid arg1, Oid arg2);
+static void add_cast_to(StringInfo buf, Oid typid);
 static text *string_to_text(char *str);
 static char *flatten_reloptions(Oid relid);
 
@@ -9504,6 +9505,85 @@ generate_operator_name(Oid operid, Oid arg1, Oid arg2)
    return buf.data;
 }
 
+/*
+ * generate_operator_clause --- generate a binary-operator WHERE clause
+ *
+ * This is used for internally-generated-and-executed SQL queries, where
+ * precision is essential and readability is secondary.  The basic
+ * requirement is to append "leftop op rightop" to buf, where leftop and
+ * rightop are given as strings and are assumed to yield types leftoptype
+ * and rightoptype; the operator is identified by OID.  The complexity
+ * comes from needing to be sure that the parser will select the desired
+ * operator when the query is parsed.  We always name the operator using
+ * OPERATOR(schema.op) syntax, so as to avoid search-path uncertainties.
+ * We have to emit casts too, if either input isn't already the input type
+ * of the operator; else we are at the mercy of the parser's heuristics for
+ * ambiguous-operator resolution.  The caller must ensure that leftop and
+ * rightop are suitable arguments for a cast operation; it's best to insert
+ * parentheses if they aren't just variables or parameters.
+ */
+void
+generate_operator_clause(StringInfo buf,
+                        const char *leftop, Oid leftoptype,
+                        Oid opoid,
+                        const char *rightop, Oid rightoptype)
+{
+   HeapTuple   opertup;
+   Form_pg_operator operform;
+   char       *oprname;
+   char       *nspname;
+
+   opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(opoid));
+   if (!HeapTupleIsValid(opertup))
+       elog(ERROR, "cache lookup failed for operator %u", opoid);
+   operform = (Form_pg_operator) GETSTRUCT(opertup);
+   Assert(operform->oprkind == 'b');
+   oprname = NameStr(operform->oprname);
+
+   nspname = get_namespace_name(operform->oprnamespace);
+
+   appendStringInfoString(buf, leftop);
+   if (leftoptype != operform->oprleft)
+       add_cast_to(buf, operform->oprleft);
+   appendStringInfo(buf, " OPERATOR(%s.", quote_identifier(nspname));
+   appendStringInfoString(buf, oprname);
+   appendStringInfo(buf, ") %s", rightop);
+   if (rightoptype != operform->oprright)
+       add_cast_to(buf, operform->oprright);
+
+   ReleaseSysCache(opertup);
+}
+
+/*
+ * Add a cast specification to buf.  We spell out the type name the hard way,
+ * intentionally not using format_type_be().  This is to avoid corner cases
+ * for CHARACTER, BIT, and perhaps other types, where specifying the type
+ * using SQL-standard syntax results in undesirable data truncation.  By
+ * doing it this way we can be certain that the cast will have default (-1)
+ * target typmod.
+ */
+static void
+add_cast_to(StringInfo buf, Oid typid)
+{
+   HeapTuple   typetup;
+   Form_pg_type typform;
+   char       *typname;
+   char       *nspname;
+
+   typetup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid));
+   if (!HeapTupleIsValid(typetup))
+       elog(ERROR, "cache lookup failed for type %u", typid);
+   typform = (Form_pg_type) GETSTRUCT(typetup);
+
+   typname = NameStr(typform->typname);
+   nspname = get_namespace_name(typform->typnamespace);
+
+   appendStringInfo(buf, "::%s.%s",
+                    quote_identifier(nspname), quote_identifier(typname));
+
+   ReleaseSysCache(typetup);
+}
+
 /*
  * generate_collation_name
  *     Compute the name to display for a collation specified by OID
index 971b707d58c6606cf7ec3ff71522ce2d13bf624b..92d36e57f54d99ceedaeb05622bda49c0a624278 100644 (file)
@@ -704,6 +704,10 @@ extern List *select_rtable_names_for_explain(List *rtable,
 extern const char *quote_identifier(const char *ident);
 extern char *quote_qualified_identifier(const char *qualifier,
                           const char *ident);
+extern void generate_operator_clause(fmStringInfo buf,
+                        const char *leftop, Oid leftoptype,
+                        Oid opoid,
+                        const char *rightop, Oid rightoptype);
 extern char *generate_collation_name(Oid collid);