Improve hash join to discard input tuples immediately if they can't
authorTom Lane
Sun, 28 Jan 2007 23:21:26 +0000 (23:21 +0000)
committerTom Lane
Sun, 28 Jan 2007 23:21:26 +0000 (23:21 +0000)
match because they contain a null join key (and the join operator is
known strict).  Improves performance significantly when the inner
relation contains a lot of nulls, as per bug #2930.

src/backend/executor/nodeHash.c
src/backend/executor/nodeHashjoin.c
src/include/executor/hashjoin.h
src/include/executor/nodeHash.h

index de64e28293df07b538d5dde1d7f2ebc97a4050c0..dffe8cb0d30bd786eb5a5d169e3c412d5e5685f3 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.108 2007/01/05 22:19:28 momjian Exp $
+ *   $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.109 2007/01/28 23:21:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -92,11 +92,14 @@ MultiExecHash(HashState *node)
        slot = ExecProcNode(outerNode);
        if (TupIsNull(slot))
            break;
-       hashtable->totalTuples += 1;
        /* We have to compute the hash value */
        econtext->ecxt_innertuple = slot;
-       hashvalue = ExecHashGetHashValue(hashtable, econtext, hashkeys);
-       ExecHashTableInsert(hashtable, slot, hashvalue);
+       if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false,
+                                &hashvalue))
+       {
+           ExecHashTableInsert(hashtable, slot, hashvalue);
+           hashtable->totalTuples += 1;
+       }
    }
 
    /* must provide our own instrumentation support */
@@ -261,19 +264,23 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
 
    /*
     * Get info about the hash functions to be used for each hash key.
+    * Also remember whether the join operators are strict.
     */
    nkeys = list_length(hashOperators);
    hashtable->hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
+   hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool));
    i = 0;
    foreach(ho, hashOperators)
    {
+       Oid         hashop = lfirst_oid(ho);
        Oid         hashfn;
 
-       hashfn = get_op_hash_function(lfirst_oid(ho));
+       hashfn = get_op_hash_function(hashop);
        if (!OidIsValid(hashfn))
            elog(ERROR, "could not find hash function for hash operator %u",
-                lfirst_oid(ho));
+                hashop);
        fmgr_info(hashfn, &hashtable->hashfunctions[i]);
+       hashtable->hashStrict[i] = op_strict(hashop);
        i++;
    }
 
@@ -657,11 +664,18 @@ ExecHashTableInsert(HashJoinTable hashtable,
  * The tuple to be tested must be in either econtext->ecxt_outertuple or
  * econtext->ecxt_innertuple.  Vars in the hashkeys expressions reference
  * either OUTER or INNER.
+ *
+ * A TRUE result means the tuple's hash value has been successfully computed
+ * and stored at *hashvalue.  A FALSE result means the tuple cannot match
+ * because it contains a null attribute, and hence it should be discarded
+ * immediately.  (If keep_nulls is true then FALSE is never returned.)
  */
-uint32
+bool
 ExecHashGetHashValue(HashJoinTable hashtable,
                     ExprContext *econtext,
-                    List *hashkeys)
+                    List *hashkeys,
+                    bool keep_nulls,
+                    uint32 *hashvalue)
 {
    uint32      hashkey = 0;
    ListCell   *hk;
@@ -691,10 +705,27 @@ ExecHashGetHashValue(HashJoinTable hashtable,
        keyval = ExecEvalExpr(keyexpr, econtext, &isNull, NULL);
 
        /*
-        * Compute the hash function
+        * If the attribute is NULL, and the join operator is strict, then
+        * this tuple cannot pass the join qual so we can reject it
+        * immediately (unless we're scanning the outside of an outer join,
+        * in which case we must not reject it).  Otherwise we act like the
+        * hashcode of NULL is zero (this will support operators that act like
+        * IS NOT DISTINCT, though not any more-random behavior).  We treat
+        * the hash support function as strict even if the operator is not.
+        *
+        * Note: currently, all hashjoinable operators must be strict since
+        * the hash index AM assumes that.  However, it takes so little
+        * extra code here to allow non-strict that we may as well do it.
         */
-       if (!isNull)            /* treat nulls as having hash key 0 */
+       if (isNull)
+       {
+           if (hashtable->hashStrict[i] && !keep_nulls)
+               return false;   /* cannot match */
+           /* else, leave hashkey unmodified, equivalent to hashcode 0 */
+       }
+       else
        {
+           /* Compute the hash function */
            uint32      hkey;
 
            hkey = DatumGetUInt32(FunctionCall1(&hashtable->hashfunctions[i],
@@ -707,7 +738,8 @@ ExecHashGetHashValue(HashJoinTable hashtable,
 
    MemoryContextSwitchTo(oldContext);
 
-   return hashkey;
+   *hashvalue = hashkey;
+   return true;
 }
 
 /*
index 7f0801c69bdb54e9bdef973187c0a61e6bbc169b..b03086fb364a17356873cdb2652ce47bce506fd8 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.86 2007/01/05 22:19:28 momjian Exp $
+ *   $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.87 2007/01/28 23:21:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -547,9 +547,8 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
    int         curbatch = hashtable->curbatch;
    TupleTableSlot *slot;
 
-   if (curbatch == 0)
-   {                           /* if it is the first pass */
-
+   if (curbatch == 0)          /* if it is the first pass */
+   {
        /*
         * Check to see if first outer tuple was already fetched by
         * ExecHashJoin() and not used yet.
@@ -559,7 +558,8 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
            hjstate->hj_FirstOuterTupleSlot = NULL;
        else
            slot = ExecProcNode(outerNode);
-       if (!TupIsNull(slot))
+
+       while (!TupIsNull(slot))
        {
            /*
             * We have to compute the tuple's hash value.
@@ -567,13 +567,22 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
            ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
 
            econtext->ecxt_outertuple = slot;
-           *hashvalue = ExecHashGetHashValue(hashtable, econtext,
-                                             hjstate->hj_OuterHashKeys);
+           if (ExecHashGetHashValue(hashtable, econtext,
+                                    hjstate->hj_OuterHashKeys,
+                                    (hjstate->js.jointype == JOIN_LEFT),
+                                    hashvalue))
+           {
+               /* remember outer relation is not empty for possible rescan */
+               hjstate->hj_OuterNotEmpty = true;
 
-           /* remember outer relation is not empty for possible rescan */
-           hjstate->hj_OuterNotEmpty = true;
+               return slot;
+           }
 
-           return slot;
+           /*
+            * That tuple couldn't match because of a NULL, so discard it
+            * and continue with the next one.
+            */
+           slot = ExecProcNode(outerNode);
        }
 
        /*
index 59ebb6ebbd98074b1f4813c86ad39f87bf08f885..ba08640767998ca1a11c2682d8e5d9ac820cb37a 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.42 2007/01/05 22:19:54 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.43 2007/01/28 23:21:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -108,6 +108,8 @@ typedef struct HashJoinTableData
     */
    FmgrInfo   *hashfunctions;  /* lookup data for hash functions */
 
+   bool       *hashStrict;     /* is each hash join operator strict? */
+
    Size        spaceUsed;      /* memory space currently used by tuples */
    Size        spaceAllowed;   /* upper limit for space used */
 
index 0ed53ec2267153f25063e353c551de758b9df3b8..bf7292e8156e89233abb892a0aa23ffe50ebdb21 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.42 2007/01/05 22:19:54 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.43 2007/01/28 23:21:26 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -28,9 +28,11 @@ extern void ExecHashTableDestroy(HashJoinTable hashtable);
 extern void ExecHashTableInsert(HashJoinTable hashtable,
                    TupleTableSlot *slot,
                    uint32 hashvalue);
-extern uint32 ExecHashGetHashValue(HashJoinTable hashtable,
+extern bool ExecHashGetHashValue(HashJoinTable hashtable,
                     ExprContext *econtext,
-                    List *hashkeys);
+                    List *hashkeys,
+                    bool keep_nulls,
+                    uint32 *hashvalue);
 extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable,
                          uint32 hashvalue,
                          int *bucketno,