Repair some problems in GIST-index contrib modules. Patch from

author Tom Lane

Thu, 7 Feb 2002 22:11:43 +0000 (22:11 +0000)

committer Tom Lane

Thu, 7 Feb 2002 22:11:43 +0000 (22:11 +0000)
author Tom Lane
Thu, 7 Feb 2002 22:11:43 +0000 (22:11 +0000)
committer Tom Lane
Thu, 7 Feb 2002 22:11:43 +0000 (22:11 +0000)
diff --git a/contrib/intarray/_int.c b/contrib/intarray/_int.c

index a642998cd444003040af75884865a8bc06e5f0dc..35ee7a659e2328c6614590e784069a6df415e0b0 100644 (file)
--- a/contrib/intarray/_int.c
+++ b/contrib/intarray/_int.c
@@ -1457,6 +1457,10 @@ _int_common_picksplit(bytea *entryvec,
     v->spl_nleft = 0;
     right = v->spl_right;
     v->spl_nright = 0;
+   if ( seed_1 == 0 || seed_2 == 0 ) {
+       seed_1 = 1;
+       seed_2 = 2;
+   }
  
     datum_alpha = (ArrayType *) DatumGetPointer(((GISTENTRY *) VARDATA(entryvec))[seed_1].key);
     datum_l = copy_intArrayType(datum_alpha);
diff --git a/contrib/tsearch/README.tsearch b/contrib/tsearch/README.tsearch

index 96059893fa693bf27b9b919a8cde99f19c1f4422..c63ae91edd096cfbf4646e565f4cd1c41b08e85e 100644 (file)
--- a/contrib/tsearch/README.tsearch
+++ b/contrib/tsearch/README.tsearch
@@ -198,23 +198,6 @@ Don't forget to do
    make clean; make; make install
  
  2.
-As it was mentioned above we don't use explicitly ID of lexems
-as in OpenFTS but use hash function (crc32) instead to map lexem to
-integer. Our experiments show that probability of collision is quite small:
-for english text it's about 10**(-6) and 10**(-5) for russian collection.
-Default installation doesn't check for collisions but if your application
-does need to guarantee an exact (no collisions) search, you need 
-to update system table to mark index islossy:
-
-  update pg_amop set amopreqcheck = true where amopclaid = 
-      (select oid from pg_opclass where opcname = 'gist_txtidx_ops');
-
-If you don't bother about collisions :
-
-  update pg_amop set amopreqcheck = false where amopclaid = 
-      (select oid from pg_opclass where opcname = 'gist_txtidx_ops');
-
-3.
  txtidx doesn't preserve words ordering (this is not critical for searching)
  for performance reason, for example:
  
@@ -224,7 +207,7 @@ test=# select 'page two'::txtidx;
   'two' 'page'
  (1 row)
  
-4. 
+3. 
  Indexed access provided by txtidx data type isn't always good
  because of internal data structure we use (RD-Tree). Particularly,
  queries like '!gist' will be  slower than just a sequential scan,
@@ -265,7 +248,7 @@ test=# select querytree( '!gist'::query_txt );
  These two queries will be processed by scanning of full index !
  Very slow !
  
-5.
+4.
  Following selects produce the same result
  
    select title from titles where titleidx @@ 'patch&gist';
diff --git a/contrib/tsearch/gistidx.c b/contrib/tsearch/gistidx.c

index 376b21a307d0c10df95b1069aaa30935c46d1ac8..5e527036840c49c1bb0b86617952727383fd3036 100644 (file)
--- a/contrib/tsearch/gistidx.c
+++ b/contrib/tsearch/gistidx.c
@@ -10,6 +10,7 @@
  #include "utils/array.h"
  #include "utils/builtins.h"
  #include "storage/bufpage.h"
+#include "access/tuptoaster.h"
  
  #include "txtidx.h"
  #include "query.h"
@@ -86,6 +87,15 @@ uniqueint( int4* a, int4 l ) {
     return res + 1 - a;
  }
  
+static void
+makesign( BITVECP sign, GISTTYPE *a) {
+   int4 k,len = ARRNELEM( a );
+   int4 *ptr = GETARR( a );
+   MemSet( (void*)sign, 0, sizeof(BITVEC) );
+   for(k=0;k
+       HASH( sign, ptr[k] );
+}
+
 Datum
 gtxtidx_compress(PG_FUNCTION_ARGS) {
    GISTENTRY *entry = (GISTENTRY *)PG_GETARG_POINTER(0);
@@ -110,8 +120,6 @@ gtxtidx_compress(PG_FUNCTION_ARGS) {
            *arr = crc32_sz( (uint8*)&words[ ptr->pos ], ptr->len );
            arr++; ptr++; 
        }
-       if ( val != toastedval )
-           pfree(val);
 
        len = uniqueint( GETARR(res), val->size );
        if ( len != val->size ) { 
@@ -120,7 +128,22 @@ gtxtidx_compress(PG_FUNCTION_ARGS) {
            len = CALCGTSIZE( ARRKEY, len );
            res = (GISTTYPE*)repalloc( (void*)res, len );
            res->len = len;
-       }   
+       }
+       if ( val != toastedval )
+           pfree(val);
+
+       /* make signature, if array is too long */
+       if ( res->len > TOAST_INDEX_TARGET ) {
+           GISTTYPE *ressign;
+
+           len = CALCGTSIZE( SIGNKEY, 0 );
+           ressign = (GISTTYPE*)palloc( len );
+           ressign->len = len;
+           ressign->flag = SIGNKEY;
+           makesign( GETSIGN(ressign), res );
+           pfree(res);
+           res = ressign;
+       }
        
        retval = (GISTENTRY*)palloc(sizeof(GISTENTRY));
        gistentryinit(*retval, PointerGetDatum(res), 
@@ -379,15 +402,6 @@ gtxtidx_penalty(PG_FUNCTION_ARGS) {
    PG_RETURN_POINTER( penalty );
 }
 
-static void
-makesign( BITVECP sign, GISTTYPE *a) {
-   int4 k,len = ARRNELEM( a );
-   int4 *ptr = GETARR( a );
-   MemSet( (void*)sign, 0, sizeof(BITVEC) );
-   for(k=0;k
-       HASH( sign, ptr[k] );
-}
-
 typedef struct {
    bool    allistrue;
    BITVEC  sign;
@@ -503,6 +517,11 @@ gtxtidx_picksplit(PG_FUNCTION_ARGS) {
    right = v->spl_right;
    v->spl_nright = 0;
 
+   if ( seed_1 == 0 || seed_2 == 0 ) {
+       seed_1 = 1;
+       seed_2 = 2;
+   }
+
    /* form initial .. */ 
    if ( cache[seed_1].allistrue ) {
        datum_l = (GISTTYPE*)palloc( CALCGTSIZE( SIGNKEY|ALLISTRUE, 0 ) );


diff --git a/contrib/tsearch/tsearch.sql.in b/contrib/tsearch/tsearch.sql.in

index 17317f255842f76038eedaaf549ad22925394db4..48a5ae27e62183027b6db5447141bafc22f8996c 100644 (file)


--- a/contrib/tsearch/tsearch.sql.in
+++ b/contrib/tsearch/tsearch.sql.in
@@ -171,7 +171,7 @@ WHERE o.oprleft = t.oid and o.oprright=tq.oid
    and ( tq.typname='query_txt' or tq.typname='mquery_txt' );
 
 INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
-   SELECT opcl.oid, 1, false, c.opoid
+   SELECT opcl.oid, 1, true, c.opoid
    FROM pg_opclass opcl, txtidx_ops_tmp c
    WHERE
       opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist')
@@ -179,7 +179,7 @@ INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
       and c.oprname = '@@';
 
 INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
-   SELECT opcl.oid, 2, false, c.opoid
+   SELECT opcl.oid, 2, true, c.opoid
    FROM pg_opclass opcl, txtidx_ops_tmp c
    WHERE
       opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist')
+       HASH( sign, ptr[k] );
+}
+
  Datum
  gtxtidx_compress(PG_FUNCTION_ARGS) {
     GISTENTRY *entry = (GISTENTRY *)PG_GETARG_POINTER(0);
@@ -110,8 +120,6 @@ gtxtidx_compress(PG_FUNCTION_ARGS) {
             *arr = crc32_sz( (uint8*)&words[ ptr->pos ], ptr->len );
             arr++; ptr++; 
         }
-       if ( val != toastedval )
-           pfree(val);
  
         len = uniqueint( GETARR(res), val->size );
         if ( len != val->size ) { 
@@ -120,7 +128,22 @@ gtxtidx_compress(PG_FUNCTION_ARGS) {
             len = CALCGTSIZE( ARRKEY, len );
             res = (GISTTYPE*)repalloc( (void*)res, len );
             res->len = len;
-       }   
+       }
+       if ( val != toastedval )
+           pfree(val);
+
+       /* make signature, if array is too long */
+       if ( res->len > TOAST_INDEX_TARGET ) {
+           GISTTYPE *ressign;
+
+           len = CALCGTSIZE( SIGNKEY, 0 );
+           ressign = (GISTTYPE*)palloc( len );
+           ressign->len = len;
+           ressign->flag = SIGNKEY;
+           makesign( GETSIGN(ressign), res );
+           pfree(res);
+           res = ressign;
+       }
         
         retval = (GISTENTRY*)palloc(sizeof(GISTENTRY));
         gistentryinit(*retval, PointerGetDatum(res), 
@@ -379,15 +402,6 @@ gtxtidx_penalty(PG_FUNCTION_ARGS) {
     PG_RETURN_POINTER( penalty );
  }
  
-static void
-makesign( BITVECP sign, GISTTYPE *a) {
-   int4 k,len = ARRNELEM( a );
-   int4 *ptr = GETARR( a );
-   MemSet( (void*)sign, 0, sizeof(BITVEC) );
-   for(k=0;k
-       HASH( sign, ptr[k] );
-}
-
 typedef struct {
    bool    allistrue;
    BITVEC  sign;
@@ -503,6 +517,11 @@ gtxtidx_picksplit(PG_FUNCTION_ARGS) {
    right = v->spl_right;
    v->spl_nright = 0;
 
+   if ( seed_1 == 0 || seed_2 == 0 ) {
+       seed_1 = 1;
+       seed_2 = 2;
+   }
+
    /* form initial .. */ 
    if ( cache[seed_1].allistrue ) {
        datum_l = (GISTTYPE*)palloc( CALCGTSIZE( SIGNKEY|ALLISTRUE, 0 ) );
-       HASH( sign, ptr[k] );
-}
-
  typedef struct {
     bool    allistrue;
     BITVEC  sign;
@@ -503,6 +517,11 @@ gtxtidx_picksplit(PG_FUNCTION_ARGS) {
     right = v->spl_right;
     v->spl_nright = 0;
  
+   if ( seed_1 == 0 || seed_2 == 0 ) {
+       seed_1 = 1;
+       seed_2 = 2;
+   }
+
     /* form initial .. */ 
     if ( cache[seed_1].allistrue ) {
         datum_l = (GISTTYPE*)palloc( CALCGTSIZE( SIGNKEY|ALLISTRUE, 0 ) );
diff --git a/contrib/tsearch/tsearch.sql.in b/contrib/tsearch/tsearch.sql.in

index 17317f255842f76038eedaaf549ad22925394db4..48a5ae27e62183027b6db5447141bafc22f8996c 100644 (file)
--- a/contrib/tsearch/tsearch.sql.in
+++ b/contrib/tsearch/tsearch.sql.in
@@ -171,7 +171,7 @@ WHERE o.oprleft = t.oid and o.oprright=tq.oid
     and ( tq.typname='query_txt' or tq.typname='mquery_txt' );
  
  INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
-   SELECT opcl.oid, 1, false, c.opoid
+   SELECT opcl.oid, 1, true, c.opoid
     FROM pg_opclass opcl, txtidx_ops_tmp c
     WHERE
        opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist')
@@ -179,7 +179,7 @@ INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
        and c.oprname = '@@';
  
  INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr)
-   SELECT opcl.oid, 2, false, c.opoid
+   SELECT opcl.oid, 2, true, c.opoid
     FROM pg_opclass opcl, txtidx_ops_tmp c
     WHERE
        opcamid = (SELECT oid FROM pg_am WHERE amname = 'gist')
author	Tom Lane
	Thu, 7 Feb 2002 22:11:43 +0000 (22:11 +0000)
committer	Tom Lane
	Thu, 7 Feb 2002 22:11:43 +0000 (22:11 +0000)
contrib/intarray/_int.c		patch \| blob \| blame \| history
contrib/tsearch/README.tsearch		patch \| blob \| blame \| history
contrib/tsearch/gistidx.c		patch \| blob \| blame \| history
contrib/tsearch/tsearch.sql.in		patch \| blob \| blame \| history